In [1]:
import tools
import geopandas
import contextily
import xarray, rioxarray
import numpy
import pandas
import pyogrio
import pygeos
from shapely.geometry import box
from dask.distributed import Client, LocalCluster
import dask.bag
import dask.dataframe
from itertools import product
import dask_geopandas

from dask_geopandas.hilbert_distance import _hilbert_distance

import warnings

In [2]:
client = Client(
    LocalCluster(n_workers=16, threads_per_worker=1)
)
client

In [3]:
specs = {
    'chip_size': 32,
    'bands': [1, 2, 3], #RGB
    'mosaic_p': (
        '/home/jovyan/work/urbangrammar_samba/'
        'ghs_composite_s2/GHS-composite-S2.vrt'
    ),
    'spsig_p': (
        '/home/jovyan/work/urbangrammar_samba/spatial_signatures/'
        'signatures/'
        'signatures_combined_levels_simplified.gpkg'
    ),
    'points_temp': '/home/jovyan/work/chips_gb/temp/points/',
    'folder': (
        '/home/jovyan/work/chips_gb/32/'
    ),
}

In [27]:
%%time
spsig = pyogrio.read_dataframe(specs['spsig_p'])

In [5]:
mosaic = rioxarray.open_rasterio(
    specs['mosaic_p'], chunks={'x': 1024, 'y': 1024}
)

In [6]:
mosaic

In [7]:
start_x = float(mosaic["x"].min())
start_y = float(mosaic["y"].min())
end_x = float(mosaic["x"].max())
end_y = float(mosaic["y"].max())

In [8]:
x_coords = numpy.arange(start_x, end_x, specs["chip_size"])
y_coords = numpy.arange(start_y, end_y, specs["chip_size"])

In [9]:
x_bag = dask.bag.from_sequence(x_coords)
y_bag = dask.bag.from_sequence(y_coords[:y_coords.shape[0]//2])

In [10]:
product_bag = x_bag.product(y_bag)

In [11]:
ddf = product_bag.to_dataframe(meta={"x": float, "y":float})

In [None]:
%%time
ddf.to_parquet("/home/jovyan/work/chips_gb/temp/coords/")

In [14]:
ddf = dask.dataframe.read_parquet("/home/jovyan/work/chips_gb/temp/coords/")

In [15]:
ddf["geometry"] = dask_geopandas.points_from_xy(ddf, "x", "y", crs=27700)

In [16]:
gddf = dask_geopandas.from_dask_dataframe(ddf).set_crs(27700)

In [None]:
gddf.to_parquet(specs["points_temp"], overwrite=True)

In [5]:
signatures = spsig[['signature_type', 'geometry']].set_index(_hilbert_distance(spsig, spsig.total_bounds, p=10))

In [6]:
signatures = dask_geopandas.from_geopandas(signatures.sort_index(), npartitions=1000)
signatures.calculate_spatial_partitions()

In [7]:
points = dask_geopandas.read_parquet(specs["points_temp"])

In [8]:
points_within = dask_geopandas.sjoin(points, signatures, how="inner", op='within')

In [None]:
%%time
warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
points_within.repartition(npartitions=1000).to_parquet("/home/jovyan/work/chips_gb/temp/points_within/")

In [10]:
client.restart()

In [4]:
points_within = dask.dataframe.read_parquet("/home/jovyan/work/chips_gb/temp/points_within/", columns=["geometry"])

In [5]:
points_within["geometry"] = points_within["geometry"].map_partitions(geopandas.GeoSeries.from_wkb, meta=geopandas.GeoSeries())

In [6]:
points_within = dask_geopandas.from_dask_dataframe(points_within)

In [7]:
polygons = points_within.buffer(specs['chip_size'] / 2, cap_style=3)

In [8]:
points_within["geometry"] = polygons

In [9]:
points_within = points_within.repartition(partition_size="5MB")

In [None]:
points_within.to_parquet("/home/jovyan/work/chips_gb/temp/polygons/", overwrite=True)

In [29]:
signatures = spsig[['signature_type', 'geometry']].set_index(_hilbert_distance(spsig, spsig.total_bounds, p=10))

In [None]:
signatures = dask_geopandas.from_geopandas(signatures.sort_index(), npartitions=1000)
signatures.to_parquet("/home/jovyan/work/chips_gb/temp/signatures")

In [45]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 11
Total threads: 11,Total memory: 86.31 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35077,Workers: 11
Dashboard: http://127.0.0.1:8787/status,Total threads: 11
Started: 48 minutes ago,Total memory: 86.31 GiB

0,1
Comm: tcp://172.17.0.3:35605,Total threads: 1
Dashboard: http://172.17.0.3:41173/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:33553,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-r1oidwj3,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-r1oidwj3

0,1
Comm: tcp://172.17.0.3:34867,Total threads: 1
Dashboard: http://172.17.0.3:40513/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:44097,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-0k0ma7qd,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-0k0ma7qd

0,1
Comm: tcp://172.17.0.3:37799,Total threads: 1
Dashboard: http://172.17.0.3:46695/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:34815,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-_vc057kx,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-_vc057kx

0,1
Comm: tcp://172.17.0.3:43779,Total threads: 1
Dashboard: http://172.17.0.3:38077/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:35167,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-zl_o7b0u,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-zl_o7b0u

0,1
Comm: tcp://172.17.0.3:40329,Total threads: 1
Dashboard: http://172.17.0.3:37455/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:44681,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-c58yszvd,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-c58yszvd

0,1
Comm: tcp://172.17.0.3:43029,Total threads: 1
Dashboard: http://172.17.0.3:33795/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:34985,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-fsx683vz,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-fsx683vz

0,1
Comm: tcp://172.17.0.3:44999,Total threads: 1
Dashboard: http://172.17.0.3:36693/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:34821,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-ea44nife,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-ea44nife

0,1
Comm: tcp://172.17.0.3:34067,Total threads: 1
Dashboard: http://172.17.0.3:35021/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:37935,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-wmaonrse,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-wmaonrse

0,1
Comm: tcp://172.17.0.3:33417,Total threads: 1
Dashboard: http://172.17.0.3:36959/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:46865,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-niixz_qj,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-niixz_qj

0,1
Comm: tcp://172.17.0.3:44975,Total threads: 1
Dashboard: http://172.17.0.3:36503/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:32845,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-7vy9m5aq,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-7vy9m5aq

0,1
Comm: tcp://172.17.0.3:36619,Total threads: 1
Dashboard: http://172.17.0.3:46191/status,Memory: 7.85 GiB
Nanny: tcp://127.0.0.1:33535,
Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-zif86z9p,Local directory: /home/jovyan/work/signature_ai/dask-worker-space/worker-zif86z9p


In [49]:
polygons = dask_geopandas.read_parquet("/home/jovyan/work/chips_gb/temp/polygons/").set_crs(27700)
signatures = dask_geopandas.read_parquet("/home/jovyan/work/chips_gb/temp/signatures/")

In [43]:
polygons_within = dask_geopandas.sjoin(polygons, spsig[['signature_type', 'geometry']], op="within")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [44]:
polygons_within.to_parquet("/home/jovyan/work/chips_gb/temp/chip_bounds/")


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  result = function(*args, **kwargs)

This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  result = function(*args, **kwargs)

This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  result = function(*args, **kwargs)

This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  result = function(*args, **kwargs)

This metadata specification does not yet make stability promises.  We do not yet recommend using this i

In [46]:
chip_bds = dask_geopandas.read_parquet("/home/jovyan/work/chips_gb/temp/chip_bounds/")
lens = chip_bds.map_partitions(len).compute()


In [48]:
lens.sum()

167369613

In [50]:
lens2 = polygons.map_partitions(len).compute()
lens2.sum()

171810151

In [54]:
chips_per_type = chip_bds.signature_type.value_counts().compute()

In [56]:
types = {
    "0_0": "Countryside agriculture",
    "1_0": "Accessible suburbia",
    "3_0": "Open sprawl",
    "4_0": "Wild countryside",
    "5_0": "Warehouse/Park land",
    "6_0": "Gridded residential quarters",
    "7_0": "Urban buffer",
    "8_0": "Disconnected suburbia",
    "2_0": "Dense residential neighbourhoods",
    "2_1": "Connected residential neighbourhoods",
    "2_2": "Dense urban neighbourhoods",
    "9_0": "Local urbanity",
    "9_1": "Concentrated urbanity",
    "9_2": "Regional urbanity",
    "9_4": "Metropolitan urbanity",
    "9_5": "Hyper concentrated urbanity",
    "9_3": "outlier",
    "9_6": "outlier",
    "9_7": "outlier",
    "9_8": "outlier",
}

In [57]:
chips_per_type.index = chips_per_type.index.map(types)

In [58]:
chips_per_type

Countryside agriculture                 84711427
Wild countryside                        43420184
Urban buffer                            28402968
Open sprawl                              4202474
Warehouse/Park land                      2102218
Accessible suburbia                      1862984
Dense residential neighbourhoods          737985
Disconnected suburbia                     538468
Dense urban neighbourhoods                471762
Connected residential neighbourhoods      424197
Gridded residential quarters              210814
Local urbanity                            193718
Regional urbanity                          66530
Metropolitan urbanity                      14084
Concentrated urbanity                       7044
Hyper concentrated urbanity                 1979
outlier                                      365
outlier                                      289
outlier                                       94
outlier                                       29
Name: signature_type