# Functional data

This notebook links various functional layers to ET cells across GB.

## Population estimates

In [44]:
import warnings

import geopandas as gpd
import pandas as pd
import numpy as np
import tobler
from time import time
import xarray
import rioxarray
import rasterstats

from dask.distributed import Client, LocalCluster, as_completed

In [45]:
warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

In [2]:
population_est = gpd.read_parquet("../../urbangrammar_samba/functional_data/population_estimates/gb_population_estimates.pq")

In [3]:
population_est

Unnamed: 0,code,population,geometry
0,E00000095,508,"POLYGON ((549375.280 184970.971, 549375.032 18..."
1,E00000096,322,"POLYGON ((549003.992 184933.133, 549007.974 18..."
2,E00000097,348,"POLYGON ((548895.389 184911.808, 548897.030 18..."
3,E00000098,305,"POLYGON ((548320.415 184671.916, 548317.197 18..."
4,E00000001,251,"POLYGON ((532303.125 181877.594, 532301.345 18..."
...,...,...,...
6971,S01013477,638,"POLYGON ((308686.000 672220.000, 308688.000 67..."
6972,S01013478,809,"POLYGON ((307423.889 672579.756, 307424.594 67..."
6973,S01013479,758,"POLYGON ((308478.000 672440.000, 308485.000 67..."
6974,S01013480,703,"POLYGON ((308734.000 672598.000, 308743.000 67..."


In [15]:
chunk = gpd.read_parquet("../../urbangrammar_samba/spatial_signatures/tessellation/tess_0.pq")

In [8]:
chunk

Unnamed: 0,hindex,tessellation,buildings
0,c000e109777t0000,"POLYGON ((336287.376 427055.178, 336286.745 42...","POLYGON ((336281.920 427085.170, 336285.250 42..."
1,c000e109777t0001,"POLYGON ((336192.633 427056.666, 336192.607 42...","POLYGON ((336196.990 427062.730, 336209.420 42..."
2,c000e109777t0002,"POLYGON ((336202.056 427028.254, 336199.483 42...","POLYGON ((336198.720 427052.340, 336200.960 42..."
3,c000e109777t0003,"POLYGON ((336194.678 427020.851, 336196.952 42...","POLYGON ((336213.400 427028.670, 336214.800 42..."
4,c000e109777t0004,"POLYGON ((336220.377 427052.195, 336218.829 42...","POLYGON ((336248.750 427066.160, 336246.990 42..."
...,...,...,...
114648,c000e599810t0000,"POLYGON Z ((348790.934 452623.250 0.000, 34878...",
114649,c000e094707t0000,"POLYGON ((352410.100 452567.740, 352408.620 45...",
114650,c000e599602t0000,"POLYGON Z ((352035.703 452585.968 0.000, 35204...",
114651,c000e599795t0000,"POLYGON Z ((348790.934 452623.250 0.000, 34879...",


In [5]:
xmin, ymin, xmax, ymax = chunk.total_bounds

In [6]:
%%time
ests = tobler.area_weighted.area_interpolate(population_est.cx[xmin:xmax, ymin:ymax], chunk.set_geometry("buildings"), extensive_variables=['population'])

CPU times: user 6.69 s, sys: 18 ms, total: 6.71 s
Wall time: 6.71 s


In [9]:
ests.population.max()

373.49790543317795

In [10]:
for chunk_id in range(103):
    s = time()
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "buildings"]).set_geometry("buildings")
    xmin, ymin, xmax, ymax = chunk.total_bounds
    ests = tobler.area_weighted.area_interpolate(population_est.cx[xmin:xmax, ymin:ymax], chunk, extensive_variables=['population'])
    pop = pd.DataFrame({'hindex': chunk.hindex.values, "population": ests.population.values})
    pop.to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/population/pop_{chunk_id}")
    print(f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.")

Chunk 0 processed sucessfully in 6.84471321105957 seconds.
Chunk 1 processed sucessfully in 8.95691466331482 seconds.
Chunk 2 processed sucessfully in 7.748736381530762 seconds.
Chunk 3 processed sucessfully in 8.444145202636719 seconds.
Chunk 4 processed sucessfully in 12.662679195404053 seconds.
Chunk 5 processed sucessfully in 33.37630248069763 seconds.
Chunk 6 processed sucessfully in 26.04380989074707 seconds.
Chunk 7 processed sucessfully in 8.81340217590332 seconds.
Chunk 8 processed sucessfully in 9.171989679336548 seconds.
Chunk 9 processed sucessfully in 7.381581783294678 seconds.
Chunk 10 processed sucessfully in 8.442428827285767 seconds.
Chunk 11 processed sucessfully in 8.21262001991272 seconds.
Chunk 12 processed sucessfully in 13.600036859512329 seconds.
Chunk 13 processed sucessfully in 9.243984937667847 seconds.
Chunk 14 processed sucessfully in 10.093465805053711 seconds.
Chunk 15 processed sucessfully in 7.550076246261597 seconds.
Chunk 16 processed sucessfully in 7

## Night lights

In [24]:
nl = xarray.open_rasterio("../../urbangrammar_samba/functional_data/employment/night_lights_osgb.tif")
nl_clip = nl.rio.clip_box(*chunk.total_bounds)
arr = nl_clip.values
affine = nl_clip.rio.transform()

In [25]:
%%time 
stats_nl = rasterstats.zonal_stats(
    chunk.tessellation, 
    raster=arr[0],
    affine=affine,
    stats=['mean'],
    all_touched=True,
    nodata = np.nan,
)

CPU times: user 1min 42s, sys: 6.97 s, total: 1min 49s
Wall time: 1min 44s


In [26]:
stats_nl

[{'mean': 5.210000038146973},
 {'mean': 4.674999952316284},
 {'mean': 4.674999952316284},
 {'mean': 4.674999952316284},
 {'mean': 4.674999952316284},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 5.210000038146973},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684},
 {'mean': 3.5999999046325684}

In [20]:
workers = 8
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:38779  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 8  Memory: 134.91 GB


In [32]:
def _night_lights(chunk_id):
    import rioxarray
    
    s = time()
    
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "tessellation"])
    nl = xarray.open_rasterio("../../urbangrammar_samba/functional_data/employment/night_lights_osgb.tif")
    nl_clip = nl.rio.clip_box(*chunk.total_bounds)
    arr = nl_clip.values
    affine = nl_clip.rio.transform()
    stats_nl = rasterstats.zonal_stats(
        chunk.tessellation, 
        raster=arr[0],
        affine=affine,
        stats=['mean'],
        all_touched=True,
        nodata = np.nan,
    )
    chunk["night_lights"] = [x['mean'] for x in stats_nl]
    chunk[["hindex", "night_lights"]].to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/night_lights/nl_{chunk_id}")
    
    return f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds."

In [33]:
inputs = iter(range(103))
futures = [client.submit(_night_lights, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(_night_lights, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())

Chunk 3 processed sucessfully in 101.62825918197632 seconds.
Chunk 0 processed sucessfully in 108.6822566986084 seconds.
Chunk 2 processed sucessfully in 115.19350171089172 seconds.
Chunk 1 processed sucessfully in 122.15239238739014 seconds.
Chunk 7 processed sucessfully in 135.8249044418335 seconds.
Chunk 4 processed sucessfully in 151.0806565284729 seconds.
Chunk 11 processed sucessfully in 96.01190829277039 seconds.
Chunk 10 processed sucessfully in 106.74090838432312 seconds.
Chunk 5 processed sucessfully in 222.54443430900574 seconds.
Chunk 9 processed sucessfully in 118.09922075271606 seconds.
Chunk 8 processed sucessfully in 138.83542704582214 seconds.
Chunk 6 processed sucessfully in 245.05762553215027 seconds.
Chunk 13 processed sucessfully in 131.59788346290588 seconds.
Chunk 15 processed sucessfully in 100.00797629356384 seconds.
Chunk 12 processed sucessfully in 189.21026062965393 seconds.
Chunk 16 processed sucessfully in 110.26696038246155 seconds.
Chunk 17 processed suc

## Worplace population by industry

In [34]:
wpz = gpd.read_parquet('../../urbangrammar_samba/functional_data/employment/workplace/workplace_by_industry_gb.pq')

In [46]:
for chunk_id in range(103):
    s = time()
    chunk = gpd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq", columns=["hindex", "buildings"]).set_geometry("buildings")
    xmin, ymin, xmax, ymax = chunk.total_bounds
    ests = tobler.area_weighted.area_interpolate(wpz.cx[xmin:xmax, ymin:ymax], chunk, extensive_variables=wpz.columns[1:-1].to_list())
    ests['hindex'] = chunk.hindex.values
    ests.drop(columns="geometry").to_parquet(f"../../urbangrammar_samba/spatial_signatures/functional/workplace/pop_{chunk_id}")
    print(f"Chunk {chunk_id} processed sucessfully in {time() - s} seconds.")

Chunk 0 processed sucessfully in 4.339667081832886 seconds.
Chunk 1 processed sucessfully in 4.707739353179932 seconds.
Chunk 2 processed sucessfully in 4.347376346588135 seconds.
Chunk 3 processed sucessfully in 3.9326553344726562 seconds.
Chunk 4 processed sucessfully in 6.005887031555176 seconds.
Chunk 5 processed sucessfully in 10.901556253433228 seconds.
Chunk 6 processed sucessfully in 10.061571836471558 seconds.
Chunk 7 processed sucessfully in 5.373244285583496 seconds.
Chunk 8 processed sucessfully in 5.54592227935791 seconds.
Chunk 9 processed sucessfully in 4.712031602859497 seconds.
Chunk 10 processed sucessfully in 4.334367275238037 seconds.
Chunk 11 processed sucessfully in 4.112551212310791 seconds.
Chunk 12 processed sucessfully in 7.6018900871276855 seconds.
Chunk 13 processed sucessfully in 5.514501094818115 seconds.
Chunk 14 processed sucessfully in 5.70710563659668 seconds.
Chunk 15 processed sucessfully in 5.131793260574341 seconds.
Chunk 16 processed sucessfully i