# Dask-ready versions of momepy algorithm

In [None]:
# !pip install git+git://github.com/jsignell/dask-geopandas.git
# !pip install git+git://github.com/geopandas/geopandas.git

In [2]:
from dask.distributed import Client, LocalCluster
import dask_geopandas as dask_geopandas
import dask.dataframe as dd
import geopandas
import pygeos
import numpy as np
import pandas as pd
import momepy



In [2]:
client = Client(LocalCluster(n_workers=14))
client

0,1
Client  Scheduler: tcp://127.0.0.1:34005  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 14  Cores: 28  Memory: 84.28 GB


In [3]:
blg = geopandas.read_parquet("../../urbangrammar_samba/spatial_signatures/buildings/blg_0.pq")
tess = geopandas.read_parquet("../../urbangrammar_samba/spatial_signatures/tessellation/tess_0.pq")

In [4]:
blg = blg.rename_geometry('buildings')
tess = tess.rename_geometry('tessellation')

df = tess.merge(blg, on='uID', how='left')

## Buildings

In [5]:
ddf = dask_geopandas.from_geopandas(df, npartitions=14)

Area

In [6]:
ddf['sdbAre'] = ddf.buildings.area

  return geopandas.GeoDataFrame(df, crs=x.crs)


Perimeter

In [7]:
ddf['sdbPer'] = ddf.buildings.length

  return geopandas.GeoDataFrame(df, crs=x.crs)


Courtyard area

In [8]:
exterior_area = ddf.buildings.map_partitions(lambda series: pygeos.area(pygeos.polygons(series.exterior.values.data)), meta='float')
ddf['sdbCoA'] = exterior_area - ddf['sdbAre']

  return geopandas.GeoDataFrame(df, crs=x.crs)


Circular compactness

In [9]:
from momepy_utils import _circle_radius

hull = ddf.buildings.convex_hull.exterior

radius = hull.apply(lambda g: _circle_radius(list(g.coords)) if g is not None else None, meta='float')
ddf['ssbCCo'] = ddf['sdbAre'] / (np.pi * radius ** 2)

  return geopandas.GeoDataFrame(df, crs=x.crs)


Corners

In [10]:
from momepy_utils import get_corners

ddf['ssbCor'] = ddf.buildings.apply(lambda g: get_corners(g), meta='float')

  return geopandas.GeoDataFrame(df, crs=x.crs)


Squareness

In [11]:
from momepy_utils import squareness

ddf['ssbSqu'] = ddf.buildings.apply(lambda g: squareness(g), meta='float')

  return geopandas.GeoDataFrame(df, crs=x.crs)


Equivalent rectangular index

In [12]:
bbox = ddf.buildings.apply(lambda g: g.minimum_rotated_rectangle if g is not None else None, meta=geopandas.GeoSeries())
ddf['ssbERI'] = (ddf['sdbAre'] / bbox.area).pow(1./2) * (bbox.length / ddf['sdbPer'])

  return geopandas.GeoDataFrame(df, crs=x.crs)


Elongation

In [13]:
from momepy_utils import elongation

ddf['ssbElo'] = bbox.map_partitions(lambda s: elongation(s), meta='float')

Centroid-corner distance

In [14]:
from momepy_utils import centroid_corner

def _centroid_corner(series):
    ccd = series.apply(lambda g: centroid_corner(g))
    return pd.DataFrame(ccd.to_list(), index=series.index)

ddf[['ssbCCM', 'ssbCCD']] = ddf.buildings.map_partitions(_centroid_corner, meta=pd.DataFrame({0: [0.1], 1: [1.1]}))

  return geopandas.GeoDataFrame(df, crs=x.crs)


Solar orientation

In [15]:
from momepy_utils import solar_orientation_poly

ddf['stbOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')

## Enclosed tessellation cells

Longest axis length

In [16]:
hull = ddf.tessellation.convex_hull.exterior

ddf['sdcLAL'] = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float') * 2

  return geopandas.GeoDataFrame(df, crs=x.crs)


Area

In [17]:
ddf['sdcAre'] = ddf.tessellation.area

  return geopandas.GeoDataFrame(df, crs=x.crs)


Circular compactness

In [18]:
hull = ddf.tessellation.convex_hull.exterior

radius = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float')
ddf['sscCCo'] = ddf['sdcAre'] / (np.pi * radius ** 2)

  return geopandas.GeoDataFrame(df, crs=x.crs)


Equivalent rectangular index

In [19]:
bbox = ddf.tessellation.apply(lambda g: g.minimum_rotated_rectangle, meta=geopandas.GeoSeries())
ddf['sscERI'] = (ddf['sdcAre'] / bbox.area).pow(1./2) * (bbox.length / ddf.tessellation.length)

  return geopandas.GeoDataFrame(df, crs=x.crs)


Solar orientation

In [20]:
ddf['stcOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')

Coverage area ratio

In [21]:
ddf['sicCAR'] = ddf['sdbAre'] / ddf['sdcAre']

Cell alignment

In [22]:
ddf['stbCeA'] = (ddf['stbOri'] / ddf['stcOri']).abs()

In [23]:
%time df = ddf.compute()

CPU times: user 6.65 s, sys: 924 ms, total: 7.58 s
Wall time: 43.2 s


In [23]:
client.close()

Contextual characters are not efficient in dask as there's a lot of between-chunk communication required.

In [5]:
from libpysal.weights import Queen

In [6]:
%%time
# make 3d geometry 2d
coords = pygeos.get_coordinates(df.tessellation.values.data)
counts = pygeos.get_num_coordinates(df.tessellation.values.data)
df['tessellation'] = geopandas.GeoSeries([pygeos.polygons(c) for c in np.split(coords, np.cumsum(counts)[:-1])], crs=df.tessellation.crs)

CPU times: user 2.28 s, sys: 79 ms, total: 2.36 s
Wall time: 2.33 s


There's a slight issue with unique IDs not being unique if there are buildings significantly crossing the enclosure boundaries. So we use index within weights matrix and store it as a column for `apply`. Make sure that index is a RangeIndex so we can use `iloc` which is faster than `loc`.

In [6]:
%time w = Queen.from_dataframe(df, geom_col='tessellation')

df['ix'] = range(len(df))

 There are 10 disconnected components.
 There is 1 island with id: 111844.


CPU times: user 42.7 s, sys: 839 ms, total: 43.6 s
Wall time: 43.6 s


Sample contextual implementation.

In [77]:
def get_mean(x, col):
    neighbours = [x]
    neighbours += w.neighbors[x]
        
    return df[col].iloc[neighbours].mean()

In [67]:
%time df['sdbAre'] = df.buildings.area

CPU times: user 24.3 ms, sys: 3 µs, total: 24.3 ms
Wall time: 22.6 ms


Alignment

In [27]:
def alignment(x, orientation):
    orientations = df[orientation].iloc[w.neighbors[x]]
    return abs(orientations - df[orientation].iloc[x]).mean()

In [28]:
%time df['mtbAli'] = df.ix.apply(alignment, args=('stbOri',))

CPU times: user 1min 1s, sys: 4.68 s, total: 1min 6s
Wall time: 58.8 s


Mean distance to neighbouring buildings

In [35]:
def neighbor_distance(x):
    geom = df.buildings.iloc[x]
    if geom is None:
        return np.nan
    return df.buildings.iloc[w.neighbors[x]].distance(df.buildings.iloc[x]).mean()

In [36]:
%time df['mtbNDi'] = df.ix.apply(neighbor_distance)

CPU times: user 1min 7s, sys: 8.99 s, total: 1min 16s
Wall time: 1min 5s


Weighted neighbours of cells

In [37]:
%time df['mtcWNe'] = df.ix.apply(lambda x: w.cardinalities[x]) / df.tessellation.length

CPU times: user 202 ms, sys: 3.53 ms, total: 205 ms
Wall time: 198 ms


Area covered by neighbourhood

In [39]:
def area_covered(x, area):
    neighbours = [x]
    neighbours += w.neighbors[x]
        
    return df[area].iloc[neighbours].sum()

In [40]:
%time df['mdcAre'] = df.ix.apply(area_covered, args=('sdcAre',))

CPU times: user 26.9 s, sys: 2.64 s, total: 29.5 s
Wall time: 26.2 s


For some characters, we need larger topological distance.

In [7]:
%time w3 = momepy.sw_high(k=3, weights=w)

CPU times: user 11.1 s, sys: 284 ms, total: 11.4 s
Wall time: 11.4 s


In [8]:
from tqdm.notebook import tqdm

tqdm.pandas()

  from pandas import Panel


Mean interbuilding distance

In [9]:
%%time
# define adjacency list from lipysal
adj_list = w.to_adjlist(remove_symmetric=True)
adj_list["distance"] = (
    df.buildings.iloc[adj_list.focal]
    .reset_index(drop=True)
    .distance(df.buildings.iloc[adj_list.neighbor].reset_index(drop=True))
)
adj_list = adj_list.set_index(['focal', 'neighbor'])

  "{} islands in this weights matrix. Conversion to an "


CPU times: user 3.13 s, sys: 8 ms, total: 3.13 s
Wall time: 3.13 s


In [13]:
adj_list.iloc[:20].distance.to_dict()

{(0, 133): 16.44838596490981,
 (0, 6): 17.13828018908813,
 (0, 9): 5.357301937154851,
 (0, 10): 11.168542354088293,
 (0, 12): 17.509621315314806,
 (0, 93): 23.44859270834423,
 (1, 2): 10.436261030283655,
 (1, 4): 7.0933605932052135,
 (1, 66): 17.375177141846365,
 (1, 119): 18.284118792004595,
 (2, 66): 12.555773884329465,
 (2, 3): 10.106636433578837,
 (2, 4): 31.941526907654914,
 (3, 4): 203.8542194804782,
 (3, 5): 30.143941400049492,
 (3, 486): 19.639473007188133,
 (3, 68): 24.899722812722125,
 (4, 119): 5.508542855640245,
 (4, 5): 10.186681441097985,
 (4, 134): 189.0290689285774}

In [11]:
def mean_interbuilding_distance(x):
    neighbours = [x]
    neighbours += w3.neighbors[x]
    return adj_list.distance.loc[neighbours, neighbours].mean()

In [12]:
%time df['ltbIBD'] = df.ix.progress_apply(mean_interbuilding_distance)

HBox(children=(FloatProgress(value=0.0, max=114653.0), HTML(value='')))




KeyboardInterrupt: 

Weighted reached enclosures

In [55]:
def weighted_reached_enclosures(x, area, enclosure_id):
    neighbours = [x]
    neighbours += w3.neighbors[x]
    
    vicinity = df[[area, enclosure_id]].iloc[neighbours]

    return vicinity[enclosure_id].unique().shape[0] / vicinity[area].sum()

In [66]:
%time df['ltcWRE'] = df.ix.apply(weighted_reached_enclosures, args=('sdcAre', 'enclosureID'))

CPU times: user 3min 22s, sys: 13.8 s, total: 3min 36s
Wall time: 3min 10s


## Enclosures

In [8]:
encl = dask_geopandas.read_parquet("../../urbangrammar_samba/spatial_signatures/enclosures/encl_*.pq")

In [5]:
encl

Unnamed: 0_level_0,geometry,enclosureID
npartitions=103,Unnamed: 1_level_1,Unnamed: 2_level_1
,geometry,int64
,...,...
...,...,...
,...,...
,...,...


In [9]:
encl['ldeAre'] = encl.geometry.area
encl['ldePer'] = encl.geometry.length

In [7]:
from momepy_utils import _circle_radius

hull = encl.geometry.convex_hull.exterior

radius = hull.apply(lambda g: _circle_radius(list(g.coords)) if g is not None else None, meta='float')
encl['lseCCo'] = encl['ldeAre'] / (np.pi * radius ** 2)

In [8]:
bbox = encl.geometry.apply(lambda g: g.minimum_rotated_rectangle if g is not None else None, meta=geopandas.GeoSeries())
encl['lseERI'] = (encl['ldeAre'] / bbox.area).pow(1./2) * (bbox.length / encl['ldePer'])

In [9]:
longest_axis = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float') * 2
encl['lseCWA'] = longest_axis * ((4 / np.pi) - (16 * encl['ldeAre']) / ((encl['ldePer']) ** 2))

In [10]:
from momepy_utils import solar_orientation_poly

encl['lteOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')

In [10]:
%time encl_df = encl.compute()

CPU times: user 6.34 s, sys: 1.35 s, total: 7.68 s
Wall time: 9.28 s


In [11]:
encl_df

Unnamed: 0,geometry,enclosureID,ldeAre,ldePer
127225,"POLYGON ((336215.000 427085.000, 336262.000 42...",109777,12611.387500,451.087222
127218,"POLYGON ((336439.800 427008.030, 336437.740 42...",109770,3200.063800,226.380464
127217,"POLYGON ((336434.000 427089.000, 336434.840 42...",109769,10837.702700,433.243236
127237,"POLYGON ((335693.000 427179.000, 335732.000 42...",109789,19370.910000,657.491255
127238,"POLYGON ((335781.000 426928.000, 335776.000 42...",109790,17780.420000,639.617669
...,...,...,...,...
82486,"POLYGON Z ((411338.735 399883.566 0.000, 41132...",634854,19878.400766,1304.939638
82485,"POLYGON Z ((410757.604 399739.248 0.000, 41072...",634853,26141.296973,1320.585561
82479,"POLYGON Z ((408888.810 399811.300 0.000, 40877...",634847,14608.806297,706.295760
82482,"POLYGON Z ((409482.740 399752.760 0.000, 40939...",634850,206576.769136,4305.068440


In [15]:
%%time
# make 3d geometry 2d
coords = pygeos.get_coordinates(encl_df.geometry.values.data)
counts = pygeos.get_num_coordinates(encl_df.geometry.values.data)
encl_df['geometry'] = geopandas.GeoSeries([pygeos.polygons(c) for c in np.split(coords, np.cumsum(counts)[:-1])], crs=encl_df.crs)

CPU times: user 13.1 s, sys: 209 ms, total: 13.3 s
Wall time: 13.1 s


Number of neighbours (weighted)

In [16]:
%time inp, res = encl_df.sindex.query_bulk(encl_df.geometry, predicate='intersects')

CPU times: user 1min 34s, sys: 455 ms, total: 1min 35s
Wall time: 1min 33s


In [17]:
%time indices, counts = np.unique(inp, return_counts=True)

CPU times: user 221 ms, sys: 1.36 ms, total: 222 ms
Wall time: 192 ms


In [21]:
%time encl_df['neighbors'] = counts - 1

CPU times: user 9.51 ms, sys: 0 ns, total: 9.51 ms
Wall time: 5.87 ms


In [22]:
%time encl_df['lteWNB'] = encl_df['neighbors'] / encl_df['ldePer']

CPU times: user 15.3 ms, sys: 0 ns, total: 15.3 ms
Wall time: 8.54 ms


Weighted number of cells

In [5]:
tess = dd.read_parquet("../../urbangrammar_samba/spatial_signatures/tessellation/tess_*.pq")
%time encl_counts = tess.groupby('enclosureID').count().compute()
merged = encl_df[['enclosureID', 'ldeAre']].merge(encl_counts[['geometry']], how='left', on='enclosureID')
encl_df['lieWCe'] = merged['geometry'] / merged['ldeAre']

In [27]:
client.close()