# Generate enclosures for the Great Britain

Connect to db:

In [1]:
import os

import geopandas as gpd

from sqlalchemy import create_engine

user = os.environ.get('DB_USER')
pwd = os.environ.get('DB_PWD')
host = os.environ.get('DB_HOST')
port = os.environ.get('DB_PORT')

db_connection_url = f"postgres+psycopg2://{user}:{pwd}@{host}:{port}/built_env"
engine = create_engine(db_connection_url)

Load initial data:

In [2]:
%%time
sql = f'SELECT * FROM openroads_200803_topological'
roads = gpd.read_postgis(sql, engine, geom_col='geometry')

CPU times: user 2min 7s, sys: 4.94 s, total: 2min 12s
Wall time: 2min 17s


In [3]:
%%time
# filter out tunnels
roads = roads[roads.roadStructure != 'Road In Tunnel']

CPU times: user 1.65 s, sys: 18.1 ms, total: 1.67 s
Wall time: 1.66 s


In [4]:
%%time
sql = f'SELECT * FROM gb_coastline_2016'
coastline = gpd.read_postgis(sql, engine, geom_col='geometry')

CPU times: user 330 ms, sys: 7.78 ms, total: 337 ms
Wall time: 378 ms


## Generate enclosures

### First level

In [5]:
import pygeos
import pandas as pd

from shapely.ops import polygonize
from shapely.geometry import Point

from snap import line_to_line, close_gaps
from consolidate import topology

In [6]:
%%time
barriers = pd.concat([roads.geometry, coastline.geometry])

CPU times: user 25.5 ms, sys: 0 ns, total: 25.5 ms
Wall time: 23 ms


In [7]:
%%time
unioned = barriers.unary_union

CPU times: user 14min 16s, sys: 7.33 s, total: 14min 23s
Wall time: 14min 13s


In [8]:
%%time
polygons = polygonize(unioned)
enclosures = gpd.array.from_shapely(list(polygons), crs=roads.crs)

CPU times: user 1min 4s, sys: 482 ms, total: 1min 4s
Wall time: 1min 3s


### Additional barriers

In [9]:
%%time
sql = f'SELECT * FROM openmap_railwaytrack_200824'
railway = gpd.read_postgis(sql, engine, geom_col='geometry')

CPU times: user 2.95 s, sys: 15.8 ms, total: 2.97 s
Wall time: 3.06 s


In [10]:
%%time
sql = f'SELECT * FROM openrivers_200909'
rivers = gpd.read_postgis(sql, engine, geom_col='geometry')

CPU times: user 9.97 s, sys: 190 ms, total: 10.2 s
Wall time: 10.8 s


#### Preprocess railways

In [11]:
%%time
railway_topo = topology(railway)

CPU times: user 4min 42s, sys: 7.19 ms, total: 4min 42s
Wall time: 4min 42s


In [12]:
%%time
closed = close_gaps(railway_topo, tolerance=25)

CPU times: user 5min 8s, sys: 506 ms, total: 5min 8s
Wall time: 5min 7s


In [13]:
%%time
closed_topo = topology(gpd.GeoDataFrame(geometry=closed))

CPU times: user 10.6 s, sys: 3.96 ms, total: 10.6 s
Wall time: 10.6 s


In [14]:
%%time
extended_topo = line_to_line(closed_topo, roads, 25)

CPU times: user 13.5 s, sys: 19.5 ms, total: 13.5 s
Wall time: 13.5 s


#### Subdivide enclosures

In [15]:
import itertools

import numpy as np
import dask.bag as db

from tqdm.notebook import tqdm
from dask.distributed import Client
from geopandas._vectorized import _pygeos_to_shapely

In [16]:
%%time
additional = pd.concat([rivers.geometry, extended_topo.geometry])

CPU times: user 3.93 ms, sys: 36 µs, total: 3.96 ms
Wall time: 2.6 ms


In [17]:
%%time
sindex = gpd.GeoSeries(enclosures).sindex
inp, res = sindex.query_bulk(additional.geometry, predicate='intersects')

CPU times: user 27.8 s, sys: 43.9 ms, total: 27.9 s
Wall time: 27.7 s


In [18]:
%%time
unique = np.unique(res)

CPU times: user 12.2 ms, sys: 2 µs, total: 12.2 ms
Wall time: 10.6 ms


##### Loop option

To figure out how long time would it take and if it is worth trying to fix dask.

In [19]:
%%time
new = []

for i in tqdm(unique, total=len(unique)):
    poly = enclosures.data[i]
    crossing = inp[res==i]
    buf = pygeos.buffer(poly, 0.01)
    crossing_ins = pygeos.intersection(buf, additional.values.data[crossing])
    union = pygeos.union_all(np.append(crossing_ins, pygeos.boundary(poly)))
    polygons = np.array(list(polygonize(_pygeos_to_shapely(union))))
    within = pygeos.covered_by(pygeos.from_shapely(polygons), buf)
    new += list(polygons[within])

HBox(children=(FloatProgress(value=0.0, max=66823.0), HTML(value='')))


CPU times: user 3min 7s, sys: 607 ms, total: 3min 7s
Wall time: 3min 6s


In [22]:
%%time
final_enclosures = gpd.GeoSeries(enclosures).drop(unique).append(gpd.GeoSeries(new))

CPU times: user 334 ms, sys: 5 µs, total: 334 ms
Wall time: 331 ms


In [23]:
final_enclosures.shape

(735372,)

In [24]:
enclosures.shape

(619191,)

In [25]:
%%time
gpd.GeoDataFrame(geometry=final_enclosures, crs=roads.crs).to_parquet('../../urbangrammar_samba/enclosures.pq')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  """Entry point for launching an IPython kernel.


CPU times: user 4.81 s, sys: 626 ms, total: 5.43 s
Wall time: 6.81 s
