# Dask-ready versions of momepy algorithm

In [None]:
# !pip install git+git://github.com/jsignell/dask-geopandas.git
# !pip install git+git://github.com/geopandas/geopandas.git

In [46]:
from dask.distributed import Client, LocalCluster
import dask_geopandas as dask_geopandas
import geopandas
import pygeos
import numpy as np
import pandas as pd
import momepy

In [2]:
client = Client(LocalCluster(n_workers=14))
client

0,1
Client  Scheduler: tcp://127.0.0.1:37271  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 14  Cores: 28  Memory: 84.28 GB


In [3]:
blg = geopandas.read_parquet("../../urbangrammar_samba/spatial_signatures/buildings/blg_0.pq")
tess = geopandas.read_parquet("../../urbangrammar_samba/spatial_signatures/tessellation/tess_0.pq")

In [4]:
blg = blg.rename_geometry('buildings')
tess = tess.rename_geometry('tessellation')

df = tess.merge(blg, on='uID', how='left')

## Buildings

In [5]:
ddf = dask_geopandas.from_geopandas(df, npartitions=14)

Area

In [6]:
ddf['sdbAre'] = ddf.buildings.area

  return geopandas.GeoDataFrame(df, crs=x.crs)


Perimeter

In [7]:
ddf['sdbPer'] = ddf.buildings.length

  return geopandas.GeoDataFrame(df, crs=x.crs)


Courtyard area

In [8]:
exterior_area = ddf.buildings.map_partitions(lambda series: pygeos.area(pygeos.polygons(series.exterior.values.data)), meta='float')
ddf['sdbCoA'] = exterior_area - ddf['sdbAre']

  return geopandas.GeoDataFrame(df, crs=x.crs)


Circular compactness

In [9]:
from momepy_utils import _circle_radius

hull = ddf.buildings.convex_hull.exterior

radius = hull.apply(lambda g: _circle_radius(list(g.coords)) if g is not None else None, meta='float')
ddf['ssbCCo'] = ddf['sdbAre'] / (np.pi * radius ** 2)

  return geopandas.GeoDataFrame(df, crs=x.crs)


Corners

In [10]:
from momepy_utils import get_corners

ddf['ssbCor'] = ddf.buildings.apply(lambda g: get_corners(g), meta='float')

  return geopandas.GeoDataFrame(df, crs=x.crs)


Squareness

In [11]:
from momepy_utils import squareness

ddf['ssbSqu'] = ddf.buildings.apply(lambda g: squareness(g), meta='float')

  return geopandas.GeoDataFrame(df, crs=x.crs)


Equivalent rectangular index

In [12]:
bbox = ddf.buildings.apply(lambda g: g.minimum_rotated_rectangle if g is not None else None, meta=geopandas.GeoSeries())
ddf['ssbERI'] = (ddf['sdbAre'] / bbox.area).pow(1./2) * (bbox.length / ddf['sdbPer'])

  return geopandas.GeoDataFrame(df, crs=x.crs)


Elongation

In [13]:
from momepy_utils import elongation

ddf['ssbElo'] = bbox.map_partitions(lambda s: elongation(s), meta='float')

Centroid-corner distance

In [14]:
from momepy_utils import centroid_corner

def _centroid_corner(series):
    ccd = series.apply(lambda g: centroid_corner(g))
    return pd.DataFrame(ccd.to_list(), index=series.index)

ddf[['ssbCCM', 'ssbCCD']] = ddf.buildings.map_partitions(_centroid_corner, meta=pd.DataFrame({0: [0.1], 1: [1.1]}))

  return geopandas.GeoDataFrame(df, crs=x.crs)


Solar orientation

In [15]:
from momepy_utils import solar_orientation_poly

ddf['stbOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')

## Enclosed tessellation cells

Longest axis length

In [16]:
hull = ddf.tessellation.convex_hull.exterior

ddf['sdcLAL'] = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float') * 2

  return geopandas.GeoDataFrame(df, crs=x.crs)


Area

In [17]:
ddf['sdcAre'] = ddf.tessellation.area

  return geopandas.GeoDataFrame(df, crs=x.crs)


Circular compactness

In [18]:
hull = ddf.tessellation.convex_hull.exterior

radius = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float')
ddf['sscCCo'] = ddf['sdcAre'] / (np.pi * radius ** 2)

  return geopandas.GeoDataFrame(df, crs=x.crs)


Equivalent rectangular index

In [19]:
bbox = ddf.tessellation.apply(lambda g: g.minimum_rotated_rectangle, meta=geopandas.GeoSeries())
ddf['sscERI'] = (ddf['sdcAre'] / bbox.area).pow(1./2) * (bbox.length / ddf.tessellation.length)

  return geopandas.GeoDataFrame(df, crs=x.crs)


Solar orientation

In [20]:
ddf['stcOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')

Coverage area ratio

In [21]:
ddf['sicCAR'] = ddf['sdbAre'] / ddf['sdcAre']

Cell alignment

In [22]:
ddf['stbCeA'] = (ddf['stbOri'] / ddf['stcOri']).abs()

In [23]:
%time df = ddf.compute()

CPU times: user 6.65 s, sys: 924 ms, total: 7.58 s
Wall time: 43.2 s


In [92]:
client.close()

Contextual characters are not efficient in dask as there's a lot of between-chunk communication required.

In [24]:
from libpysal.weights import Queen

In [25]:
%%time
# make 3d geometry 2d
coords = pygeos.get_coordinates(df.tessellation.values.data)
counts = pygeos.get_num_coordinates(df.tessellation.values.data)
df['tessellation'] = geopandas.GeoSeries([pygeos.polygons(c) for c in np.split(coords, np.cumsum(counts)[:-1])], crs=df.tessellation.crs)

CPU times: user 2.1 s, sys: 117 ms, total: 2.22 s
Wall time: 2.18 s


There's a slight issue with unique IDs not being unique if there are buildings significantly crossing the enclosure boundaries. So we use index within weights matrix and store it as a column for `apply`. Make sure that index is a RangeIndex so we can use `iloc` which is faster than `loc`.

In [26]:
%time w = Queen.from_dataframe(df, geom_col='tessellation')

df['ix'] = range(len(df))

 There are 10 disconnected components.
 There is 1 island with id: 111844.


CPU times: user 56.6 s, sys: 5.46 s, total: 1min 2s
Wall time: 56.3 s


Sample contextual implementation.

In [77]:
def get_mean(x, col):
    neighbours = [x]
    neighbours += w.neighbors[x]
        
    return df[col].iloc[neighbours].mean()

In [67]:
%time df['sdbAre'] = df.buildings.area

CPU times: user 24.3 ms, sys: 3 µs, total: 24.3 ms
Wall time: 22.6 ms


Alignment

In [27]:
def alignment(x, orientation):
    orientations = df[orientation].iloc[w.neighbors[x]]
    return abs(orientations - df[orientation].iloc[x]).mean()

In [28]:
%time df['mtbAli'] = df.ix.apply(alignment, args=('stbOri',))

CPU times: user 1min 1s, sys: 4.68 s, total: 1min 6s
Wall time: 58.8 s


Mean distance to neighbouring buildings

In [35]:
def neighbor_distance(x):
    geom = df.buildings.iloc[x]
    if geom is None:
        return np.nan
    return df.buildings.iloc[w.neighbors[x]].distance(df.buildings.iloc[x]).mean()

In [36]:
%time df['mtbNDi'] = df.ix.apply(neighbor_distance)

CPU times: user 1min 7s, sys: 8.99 s, total: 1min 16s
Wall time: 1min 5s


Weighted neighbours of cells

In [37]:
%time df['mtcWNe'] = df.ix.apply(lambda x: w.cardinalities[x]) / df.tessellation.length

CPU times: user 202 ms, sys: 3.53 ms, total: 205 ms
Wall time: 198 ms


Area covered by neighbourhood

In [39]:
def area_covered(x, area):
    neighbours = [x]
    neighbours += w.neighbors[x]
        
    return df[area].iloc[neighbours].sum()

In [40]:
%time df['mdcAre'] = df.ix.apply(area_covered, args=('sdcAre',))

CPU times: user 26.9 s, sys: 2.64 s, total: 29.5 s
Wall time: 26.2 s


For some characters, we need larger topological distance.

In [48]:
%time w3 = momepy.sw_high(k=3, weights=w)

CPU times: user 12.4 s, sys: 461 ms, total: 12.8 s
Wall time: 12.6 s


Mean interbuilding distance

In [51]:
%%time
# define adjacency list from lipysal
adj_list = w.to_adjlist()
adj_list["distance"] = (
    df.buildings.iloc[adj_list.focal]
    .reset_index(drop=True)
    .distance(df.buildings.iloc[adj_list.neighbor].reset_index(drop=True))
)
adj_list = adj_list.set_index(['focal', 'neighbor'])

  "{} islands in this weights matrix. Conversion to an "


CPU times: user 3.7 s, sys: 6.67 ms, total: 3.7 s
Wall time: 3.62 s


In [64]:
def mean_interbuilding_distance(x):
    neighbours = [x]
    neighbours += w3.neighbors[x]
    return adj_list.distance.loc[neighbours, neighbours].mean()

In [65]:
%time df['ltbIBD'] = df.ix.apply(mean_interbuilding_distance)

  after removing the cwd from sys.path.


CPU times: user 30min 22s, sys: 33 s, total: 30min 55s
Wall time: 26min 41s


Weighted reached enclosures

In [55]:
def weighted_reached_enclosures(x, area, enclosure_id):
    neighbours = [x]
    neighbours += w3.neighbors[x]
    
    vicinity = df[[area, enclosure_id]].iloc[neighbours]

    return vicinity[enclosure_id].unique().shape[0] / vicinity[area].sum()

In [66]:
%time df['ltcWRE'] = df.ix.apply(weighted_reached_enclosures, args=('sdcAre', 'enclosureID'))

CPU times: user 3min 22s, sys: 13.8 s, total: 3min 36s
Wall time: 3min 10s
