In [28]:
def generate_tess(buildings, enclosures, n_workers=1):
    tessellation = mm.enclosed_tessellation(
        buildings, enclosures.geometry, simplify=True, n_jobs=n_workers
    )
    # deal with split buildings
    tessellation = tessellation.dissolve(by=tessellation.index.values)

    # drop empty spaces with no buildings and a positive index,
    # leave negatives in the geodataframe
    tessellation = tessellation.explode()
    inp, res = buildings.geometry.centroid.sindex.query(tessellation.geometry)
    to_keep = np.append(np.unique(inp), np.where(tessellation.index.values < 0)[0])
    tessellation = tessellation.iloc[to_keep]

    ### drop any remaining duplicates
    ## sometimes -1s have multiple tesselation cells
    tessellation = tessellation[~tessellation.index.duplicated()].sort_index()
    return tessellation

In [29]:
import datetime
import glob

import geopandas as gpd
import momepy as mm
import numpy as np
import shapely
from libpysal.graph import Graph

regions_datadir = "/data/uscuni-ulce/"
data_dir = "/data/uscuni-ulce/processed_data/"
eubucco_files = glob.glob(regions_datadir + "eubucco_raw/*")
graph_dir = data_dir + "neigh_graphs/"

In [30]:
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [31]:
# 12199 - hills, small test
# 69300 - prague medium
# 226 - germany somewhere, largest cluster
# 106928 + 1 - big one in poland
for region_id, region_hull in region_hulls.iterrows():
    if region_id < 10:
        continue
    break
region_id

10

In [32]:
%%time
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")
buildings_limit = mm.buffered_limit(buildings, buffer="adaptive")
enclosures = mm.enclosures(streets, limit=buildings_limit)
tesselations = generate_tess(buildings, enclosures, n_workers=-1)


### there are some edge cases for long and narrow buildings and
## completely wrong polygons that are dropped by voronoi_frames
## region 10 has this problem
tesselation_coverage = np.isin(buildings.index.values, tesselations.index.values)

CPU times: user 1min 15s, sys: 1.97 s, total: 1min 17s
Wall time: 1min 39s


In [34]:
assert tesselation_coverage.all()

In [14]:
print("----", "Processing region: ", region_id, datetime.datetime.now())
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")

---- Processing region:  12199 2024-06-14 17:13:51.993366


In [15]:
buildings_limit = mm.buffered_limit(buildings, buffer="adaptive")
enclosures = mm.enclosures(streets, limit=buildings_limit)

In [16]:
n_workers = -1
tessellation = mm.enclosed_tessellation(
    buildings, enclosures.geometry, simplify=True, n_jobs=n_workers
)
# deal with split buildings
tessellation = tessellation.dissolve(by=tessellation.index.values)

# drop empty spaces with no buildings and a positive index,
# leave negatives in the geodataframe
tessellation = tessellation.explode()
inp, res = buildings.geometry.centroid.sindex.query(tessellation.geometry)
to_keep = np.append(np.unique(inp), np.where(tessellation.index.values < 0)[0])
tessellation = tessellation.iloc[to_keep]

### drop any remaining duplicates
## sometimes -1s have multiple tesselation cells
tessellation = tessellation[~tessellation.index.duplicated()].sort_index()

In [17]:
graph = Graph.build_fuzzy_contiguity(tessellation, buffer=1e-6).assign_self_weight()

In [26]:
graph.to_parquet(
    data_dir + "neigh_graphs/" + f"enclosure_graph_{region_id}_knn1.parquet"
)

In [21]:
subgraph = graph.subgraph(tessellation.index[tessellation.index >= 0].values)

In [24]:
# m = enclosures.explore()
# m = subgraph.explore(tessellation, m=m)
# m

In [None]:

shapely.__version__

In [None]:
from packaging.version import Version

In [None]:
Version(shapely.__version__) >= Version("2.1.0dev")

In [1]:
import geopandas as gpd
import momepy as mm
import numpy as np
from libpysal.graph import Graph

In [2]:
test_file_path = "./prg_geometry.gpkg"
import fiona

fiona.listlayers(test_file_path)

['buildings', 'nodes', 'edges', 'tessellation', 'blocks']

In [3]:
%%time
# example data
df_streets = gpd.read_file(test_file_path, layer="edges")
df_buildings = gpd.read_file(test_file_path, layer="buildings")

CPU times: user 6.9 s, sys: 32.2 ms, total: 6.94 s
Wall time: 6.93 s


In [4]:
%%time
check = mm.CheckTessellationInput(df_buildings)
check

Collapsed features  : 0
Split features      : 291
Overlapping features: 0
CPU times: user 4.09 s, sys: 60 ms, total: 4.15 s
Wall time: 4.15 s


<momepy.preprocessing.CheckTessellationInput at 0x71116247fa40>

In [5]:
limit = mm.buffered_limit(df_buildings, 100)

In [6]:
%%time
tessellation = mm.morphological_tessellation(df_buildings, limit)

CPU times: user 2min 39s, sys: 4.04 s, total: 2min 43s
Wall time: 2min 43s


In [7]:
tessellation = tessellation.rename("geometry").to_frame()

In [8]:
%%time
extended = mm.extend_lines(
    df_streets,
    tolerance=120,
    target=gpd.GeoSeries([limit.boundary]),
    barrier=df_buildings,
)

CPU times: user 2.45 s, sys: 48 µs, total: 2.45 s
Wall time: 2.45 s


In [9]:
%%time
(blocks_df, buildings_bid, tessellation_bid) = mm.generate_blocks(
    tessellation, edges=extended, buildings=df_buildings
)

CPU times: user 1min 28s, sys: 1.24 s, total: 1min 30s
Wall time: 1min 30s


In [None]:
(blocks_df, buildings_bid, tessellation_bid) = mm.generate_blocks(
    tessellation, df_streets, buildings=df_buildings
)

In [None]:
blocks_df

In [36]:
df_buildings["bID"] = buildings_bid  # get block ID
tessellation["bID"] = tessellation_bid  # get block ID

In [29]:
%%time
df_streets["nID"] = range(len(df_streets))
df_buildings["nID"] = mm.get_network_id(
    df_buildings, df_streets, "nID", min_size=300, verbose=False
)

CPU times: user 1min, sys: 11.9 ms, total: 1min 1s
Wall time: 1min




In [37]:
tessellation = tessellation.merge(
    df_buildings[["nID"]], right_index=True, left_index=True, how="left"
)

In [None]:
### save to file

In [39]:
path = "./data/new_geometry.gpkg"
tessellation.to_file(path, layer="tessellation", driver="GPKG")
df_buildings.to_file(path, layer="buildings", driver="GPKG")
blocks_df.to_file(path, layer="blocks", driver="GPKG")
df_streets.to_file(path, layer="streets", driver="GPKG")

In [None]:
## compare to oo version - does not work - the two tessalation functions return different results

In [22]:
%%time
test_file_path = "./prg_geometry.gpkg"
old_df_streets = gpd.read_file(test_file_path, layer="edges")
old_df_buildings = gpd.read_file(test_file_path, layer="buildings")
old_df_blocks = gpd.read_file(test_file_path, layer="blocks")
old_df_tessellation = gpd.read_file(test_file_path, layer="tessellation")

CPU times: user 23.4 s, sys: 80 ms, total: 23.5 s
Wall time: 23.5 s


In [23]:
old_df_streets["nID"] = mm.unique_id(old_df_streets)

In [24]:
old_df_buildings["nID"] = mm.get_network_id(
    old_df_buildings, old_df_streets, "nID", min_size=300, verbose=False
)

  old_df_buildings['nID'] = mm.get_network_id(old_df_buildings,


In [25]:
%%time
limit = mm.buffered_limit(old_df_buildings, 100)
extended = mm.extend_lines(
    old_df_streets,
    tolerance=120,
    target=gpd.GeoSeries([limit.boundary]),
    barrier=old_df_buildings,
)
blocks = mm.Blocks(old_df_tessellation, extended, old_df_buildings, "bID", "uID")
old_df_buildings["bID"] = blocks.buildings_id
old_df_tessellation["bID"] = blocks.tessellation_id
blocks = blocks.blocks

CPU times: user 1min 41s, sys: 1.48 s, total: 1min 42s
Wall time: 1min 42s


In [26]:
blocks.shape

(7220, 2)

In [27]:
%%time
new_file_path = "./data/new_geometry.gpkg"
new_df_streets = gpd.read_file(new_file_path, layer="streets")
new_df_buildings = gpd.read_file(new_file_path, layer="buildings")
new_df_blocks = gpd.read_file(new_file_path, layer="blocks")
new_df_tessellation = gpd.read_file(new_file_path, layer="tessellation")

CPU times: user 24.3 s, sys: 92.4 ms, total: 24.4 s
Wall time: 24.3 s


In [28]:
from geopandas.testing import assert_geodataframe_equal

In [37]:
assert_geodataframe_equal(new_df_streets, old_df_streets)
assert_geodataframe_equal(new_df_buildings, old_df_buildings)
assert_geodataframe_equal(new_df_blocks, old_df_blocks)
assert_geodataframe_equal(new_df_tessellation, old_df_tessellation)

AssertionError: GeoDataFrame.columns are different

GeoDataFrame.columns values are different (50.0 %)
[left]:  Index(['uID', 'bID', 'nID', 'geometry'], dtype='object')
[right]: Index(['uID', 'geometry', 'nID', 'bID'], dtype='object')