In [4]:
# import momepy as mm
# import numpy as np
# import pandas as pd
# from libpysal.graph import Graph
# import geopandas as gpd
# import pytest

In [5]:
# %%time
# new_file_path = './data/new_geometry.gpkg'
# new_df_streets = gpd.read_file(new_file_path, layer='streets')
# new_df_buildings = gpd.read_file(new_file_path, layer="buildings")
# new_df_blocks = gpd.read_file(new_file_path, layer="blocks")
# new_df_tessellation = gpd.read_file(new_file_path, layer="tessellation")


# buildings = new_df_buildings
# streets = new_df_streets
# blocks = new_df_blocks
# tessellation = new_df_tessellation

In [50]:
import gc
import glob

import geopandas as gpd
import momepy as mm
import numpy as np
import pandas as pd
import shapely
from libpysal.graph import Graph, read_parquet
from utils import lazy_higher_order, partial_apply

regions_datadir = "/data/uscuni-ulce/"
data_dir = "/data/uscuni-ulce/processed_data/"
eubucco_files = glob.glob(regions_datadir + "eubucco_raw/*")
graph_dir = data_dir + "neigh_graphs/"
chars_dir = "/data/uscuni-ulce/processed_data/chars/"

In [51]:
def check_available():
    elements = ["buildings", "enclosures", "tessellations", "nodes", "streets"]
    for el in elements:
        el_ids = [
            int(re.findall(r"\d+", f)[0])
            for f in glob.glob(chars_dir + f"{el}/*.parquet")
        ]
        missing = np.setdiff1d(region_hulls.index.values, el_ids)
        print(f"Missing {el} for regions {missing}")


check_available()

problem_regions = [66000, 34392]

Missing buildings for regions [ 34392  55713  62929  66000 107685 115457]
Missing enclosures for regions [ 55713  62929 115457]
Missing tessellations for regions [ 34392  55713  62929  66000 107685 115457]
Missing nodes for regions [  3607   8754  16501  55713  62929 107685 115457]
Missing streets for regions [  3607   8754  16501  55713  62929 107685 115457]


In [13]:
building_region_mapping = pd.read_parquet(
    regions_datadir + "regions/" + "id_to_region.parquet", engine="pyarrow"
)
counts = building_region_mapping.groupby("region")["id"].size()
del building_region_mapping

In [22]:
large_regions = counts[counts > 6e5].index

In [23]:
large_regions

Index([4, 226, 3607, 8754, 16501, 55713, 62929, 107685, 115457], dtype='int64', name='region')

In [52]:
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [54]:
# 12199 - hills, small test
# 69300 - prague medium
# 226 - germany somewhere, largest cluster

for region_id, region_hull in region_hulls.iterrows():
    if region_id != 34392:
        continue

    break
region_id

34392

### Streets & Nodes

In [57]:
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")

graph = mm.gdf_to_nx(streets)
graph = mm.node_degree(graph)
graph = mm.subgraph(
    graph,
    radius=5,
    meshedness=True,
    cds_length=False,
    mode="sum",
    degree="degree",
    length="mm_len",
    mean_node_degree=False,
    proportion={0: True, 3: True, 4: True},
    cyclomatic=False,
    edge_node_ratio=False,
    gamma=False,
    local_closeness=True,
    closeness_weight="mm_len",
    verbose=False,
)
graph = mm.cds_length(graph, radius=3, name="ldsCDL", verbose=False)
graph = mm.clustering(graph, name="xcnSCl")
graph = mm.mean_node_dist(graph, name="mtdMDi", verbose=False)

nodes, edges = mm.nx_to_gdf(graph, spatial_weights=False)

edges["sdsLen"] = streets.geometry.length
street_orientation = mm.orientation(streets)
edges["sssLin"] = mm.linearity(streets)

In [None]:
str_q1 = read_parquet(graph_dir + f"street_graph_{region_id}_knn1.parquet")


def mean_edge_length(partical_focals, partial_higher, y):
    return partial_higher.describe(
        y.loc[partial_higher.unique_ids], statistics=["mean"]
    )["mean"]


edges["ldsMSL"] = partial_apply(
    str_q1,
    higher_order_k=3,
    n_splits=10,
    func=mean_edge_length,
    y=edges.geometry.length,
)

In [None]:
edges_w3 = str_q1.higher_order(k=3, diagonal=True, lower_order=True)

In [None]:
r = edges_w3.describe(edges.geometry.length, statistics=["mean"])["mean"]
assert np.allclose(edges["ldsMSL"].values, r.values)

In [10]:
## tesselation street interactions
tessellation = gpd.read_parquet(
    data_dir + f"/tessellations/tessellation_{region_id}.parquet"
)
# tessellation to street
tess_nid = mm.get_network_id(tessellation, edges, network_id=edges.index, verbose=False)
streets["sdsAre"] = mm.describe_agg(
    tessellation.geometry.area, tess_nid, streets.index, statistics=["sum", "count"]
)["sum"]

  tess_nid = mm.get_network_id(tessellation, edges, network_id=edges.index, verbose=False)


In [11]:
from utils import partial_describe_reached_agg

In [12]:
res = partial_describe_reached_agg(
    tessellation.geometry.area,
    tess_nid,
    str_q1,
    higher_order=3,
    n_splits=10,
    q=None,
    statistics=["sum", "count"],
)

edges["ldsRea"] = res["count"]
edges["ldsAre"] = res["sum"]

In [13]:
from pandas.testing import assert_frame_equal

In [14]:
expected_res = mm.describe_reached_agg(
    tessellation.geometry.area, tess_nid, graph=edges_w3, statistics=["sum", "count"]
)

In [15]:
assert_frame_equal(expected_res, res, check_names=False)

In [16]:
%%time
## street building interactions
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")

profile = mm.street_profile(streets, buildings, height=None, distance=3)
edges["sdsSPW"] = profile["width"]
edges["sdsSPO"] = profile["openness"]
edges["sdsSWD"] = profile["width_deviation"]

CPU times: user 1min 1s, sys: 808 ms, total: 1min 2s
Wall time: 1min 2s


In [21]:
node_graph = read_parquet(graph_dir + f"nodes_graph_{region_id}_knn1.parquet")
higher_order = 5
graph = node_graph
n_splits = 2

In [22]:
res = pd.Series(np.nan, index=graph.unique_ids)

for partial_higher in lazy_higher_order(graph, k=higher_order, n_splits=n_splits):
    partial_focals = np.setdiff1d(partial_higher.unique_ids, partial_higher.isolates)
    break

In [28]:
def partial_node_density(partial_focals, partial_higher, nodes, edges, weighted):
    return mm.node_density(
        nodes.loc[partial_higher.unique_ids], edges, partial_higher, weighted
    )

In [35]:
%%time
res = partial_apply(
    node_graph,
    higher_order_k=higher_order,
    n_splits=n_splits,
    func=partial_node_density,
    nodes=nodes,
    edges=edges,
    weighted=False,
)

CPU times: user 1min 1s, sys: 4.01 ms, total: 1min 1s
Wall time: 1min 1s


In [38]:
%%time
nodes5 = node_graph.higher_order(k=5, lower_order=True, diagonal=True)

CPU times: user 662 ms, sys: 15 µs, total: 662 ms
Wall time: 662 ms


In [39]:
%%time
expected_res = mm.node_density(nodes, edges, nodes5, False)

CPU times: user 50.7 s, sys: 0 ns, total: 50.7 s
Wall time: 50.7 s


In [40]:
from pandas.testing import assert_series_equal

In [41]:
assert_series_equal(res, expected_res)

### Enclosures

In [42]:
print("Processing enclosures")
enclosures = gpd.read_parquet(data_dir + f"/enclosures/enclosure_{region_id}.parquet")
enclosures["ldkAre"] = enclosures.geometry.area
enclosures["ldkPer"] = enclosures.geometry.length
enclosures["lskCCo"] = mm.circular_compactness(enclosures)
enclosures["lskERI"] = mm.equivalent_rectangular_index(enclosures)
enclosures["lskCWA"] = mm.compactness_weighted_axis(enclosures)
enclosures["ltkOri"] = mm.orientation(enclosures)

blo_q1 = read_parquet(graph_dir + f"enclosure_graph_{region_id}_knn1.parquet")
enclosures["ltkWNB"] = mm.neighbors(enclosures, blo_q1, weighted=True)

Processing enclosures


In [44]:
# ## buildings enclosures interactions
# buildings = gpd.read_parquet(data_dir + f'/buildings/buildings_{region_id}.parquet')
# tessellation = gpd.read_parquet(data_dir + f'/tessellations/tessellation_{region_id}.parquet')

In [46]:
beid = buildings.merge(
    tessellation["enclosure_index"], left_index=True, right_index=True
)["enclosure_index"]

res = mm.describe_agg(
    buildings.geometry.area,
    beid,
    result_index=enclosures.index,
    statistics=["count", "sum"],
)

enclosures["likWBB"] = res["sum"] / enclosures.geometry.area

### Buildings

In [144]:
%%time
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")

buildings["sdbAre"] = buildings.geometry.area
buildings["sdbPer"] = buildings.geometry.length
buildings["sdbCoA"] = mm.courtyard_area(buildings.geometry)
buildings["ssbCCo"] = mm.circular_compactness(buildings)
buildings["ssbCor"] = mm.corners(buildings.geometry)
buildings["ssbSqu"] = mm.squareness(buildings.geometry)
buildings["ssbERI"] = mm.equivalent_rectangular_index(buildings.geometry)
buildings["ssbElo"] = mm.elongation(buildings.geometry)

cencon = mm.centroid_corner_distance(buildings)
buildings["ssbCCM"] = cencon["mean"]
buildings["ssbCCD"] = cencon["std"]
buildings["stbOri"] = mm.orientation(buildings)
# buildings["mtbSWR"] = mm.shared_walls(buildings) /  buildings.geometry.length

buildings_q1 = read_parquet(graph_dir + f"building_graph_{region_id}_knn1.parquet")
buildings["libNCo"] = mm.courtyards(buildings, buildings_q1)
buildings["ldbPWL"] = mm.perimeter_wall(buildings, buildings_q1)

  angles = np.arccos(cosine_angle)
  cosine_angle = np.sum(ba * bc, axis=1) / (
  angles = np.arccos(cosine_angle)
  cosine_angle = np.sum(ba * bc, axis=1) / (
  angles = np.arccos(cosine_angle)
  cosine_angle = np.sum(ba * bc, axis=1) / (


CPU times: user 3min 7s, sys: 1.51 s, total: 3min 8s
Wall time: 3min 8s


In [61]:
buildings.is_valid.all()

True

In [96]:
# buildings.iloc[85_250:85_300].explore()

In [134]:
buildings.iloc[85_280:85_290].to_parquet("problem_buildings.parquet")

In [135]:
# buildings.set_precision(0).iloc[85_280:85_290].explore()

In [125]:
mm.shared_walls(buildings.iloc[85_280:85_290].set_precision(1e-20)).sum()

0.0

In [142]:
problem_buildings = buildings.iloc[85_280:85_290]
problem_buildings.loc[:, "geometry"] = problem_buildings.make_valid()

In [None]:
problem_buildings

In [143]:
mm.shared_walls(problem_buildings).sum()

GEOSException: TopologyException: side location conflict at 4393635.0975778354 2932538.141627443. This can occur if the input geometry is invalid.

In [145]:
queen_1 = read_parquet(graph_dir + f"tessellation_graph_{region_id}_knn1.parquet")
bgraph = queen_1.subgraph(buildings_q1.unique_ids)

In [146]:
buildings["ltcBuA"] = mm.building_adjacency(buildings_q1, bgraph)

In [147]:
buildings["mtbAli"] = mm.alignment(buildings["stbOri"], bgraph)
buildings["mtbNDi"] = mm.neighbor_distance(buildings, bgraph)

In [148]:
def partial_mean_intb_dist(partial_focals, partial_higher, buildings, bgraph):
    pos_unique_higher = partial_higher.unique_ids
    pos_unique_higher = pos_unique_higher[pos_unique_higher >= 0]
    partial_buildings = buildings.loc[pos_unique_higher]
    partial_bgraph = bgraph.subgraph(partial_buildings.index.values)
    partial_bgraph3 = partial_higher.subgraph(partial_buildings.index.values)

    res = pd.Series(np.nan, index=partial_higher.unique_ids)
    mibd = mm.mean_interbuilding_distance(
        buildings.loc[pos_unique_higher], partial_bgraph, partial_bgraph3
    )
    res.loc[mibd.index] = mibd.values
    return res

In [149]:
%%time
res = partial_apply(
    graph=queen_1,
    higher_order_k=3,
    n_splits=20,
    func=partial_mean_intb_dist,
    buildings=buildings,
    bgraph=bgraph,
)
buildings["ltbIBD"] = res[res.index >= 0]

CPU times: user 3min 6s, sys: 110 ms, total: 3min 6s
Wall time: 3min 6s


In [82]:
# # higher = queen_1.higher_order(k=3, lower_order=True, diagonal=True)

# bgraph3 = higher.subgraph(buildings.index.values)
# expected_res = mm.mean_interbuilding_distance(buildings, bgraph, bgraph3)

In [83]:
assert_series_equal(expected_res, buildings["ltbIBD"], check_names=False)

In [84]:
del bgraph
gc.collect()

2710

In [150]:
tessellation = gpd.read_parquet(
    data_dir + f"/tessellations/tessellation_{region_id}.parquet"
)
tessellation["stcOri"] = mm.orientation(tessellation)
buildings["stbCeA"] = mm.cell_alignment(
    buildings["stbOri"], tessellation[tessellation.index >= 0]["stcOri"]
)

In [151]:
## building streets interactions
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")
graph = mm.gdf_to_nx(streets)
nodes, edges = mm.nx_to_gdf(graph, spatial_weights=False)
tess_nid = mm.get_network_id(tessellation, edges, network_id=edges.index, verbose=False)
blg_nid = tess_nid[tess_nid.index >= 0]
street_orientation = mm.orientation(streets)
buildings["nID"] = blg_nid
edges["nID"] = edges.index.values
buildings["stbSAl"] = mm.street_alignment(
    buildings["stbOri"][~blg_nid.isna()], street_orientation, blg_nid[~blg_nid.isna()]
)

buildings["nodeID"] = mm.get_node_id(
    buildings, nodes, edges, "nodeID", "nID", verbose=False
)

  tess_nid = mm.get_network_id(tessellation, edges, network_id=edges.index, verbose=False)


### Tessellations

In [152]:
tessellation = gpd.read_parquet(
    data_dir + f"/tessellations/tessellation_{region_id}.parquet"
)

tessellation["stcOri"] = mm.orientation(tessellation)
tessellation["sdcLAL"] = mm.longest_axis_length(tessellation)
tessellation["sdcAre"] = tessellation.geometry.area
tessellation["sscCCo"] = mm.circular_compactness(tessellation)
tessellation["sscERI"] = mm.equivalent_rectangular_index(tessellation.geometry)

queen_1 = read_parquet(graph_dir + f"tessellation_graph_{region_id}_knn1.parquet")
tessellation["mtcWNe"] = mm.neighbors(tessellation, queen_1, weighted=True)
tessellation["mdcAre"] = queen_1.describe(
    tessellation.geometry.area, statistics=["sum"]
)["sum"]

In [153]:
def partial_block_count(partial_focal, partial_higher, y):
    return partial_higher.describe(
        y.loc[partial_higher.unique_ids], statistics=["nunique"]
    )["nunique"]

In [154]:
def partial_block_count(partial_focal, partial_higher, y):
    return partial_higher.describe(
        y.loc[partial_higher.unique_ids], statistics=["nunique"]
    )["nunique"]


tessellation["ltcWRB"] = partial_apply(
    queen_1,
    higher_order_k=3,
    n_splits=10,
    func=partial_block_count,
    y=tessellation["enclosure_index"],
)

In [156]:
# expected_res = higher.describe(tessellation['enclosure_index'], statistics=['nunique'])['nunique']

In [157]:
# assert_series_equal(expected_res, tessellation['ltcWRB'], check_names=False, check_dtype=False)

In [158]:
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")

In [159]:
tessellation["sicCAR"] = buildings.geometry.area / tessellation.geometry.area

In [160]:
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")
street_orientation = mm.orientation(streets)
graph = mm.gdf_to_nx(streets)
nodes, edges = mm.nx_to_gdf(graph, spatial_weights=False)
tess_nid = mm.get_network_id(tessellation, edges, network_id=edges.index, verbose=False)

  tess_nid = mm.get_network_id(tessellation, edges, network_id=edges.index, verbose=False)


In [161]:
tessellation["stcSAl"] = mm.street_alignment(
    tessellation["stcOri"][~tess_nid.isna()],
    street_orientation,
    tess_nid[~tess_nid.isna()].astype(int).values,
)

In [162]:
edges["nID"] = edges.index.values
buildings["nID"] = tess_nid[tess_nid.index >= 0]
tessellation["nodeID"] = mm.get_node_id(
    buildings, nodes, edges, "nodeID", "nID", verbose=False
)

### Merging data

In [148]:
region_id = 10

In [149]:
tessellation = gpd.read_parquet(chars_dir + f"tessellations/chars_{region_id}.parquet")
buildings = gpd.read_parquet(chars_dir + f"buildings/chars_{region_id}.parquet")
enclosures = gpd.read_parquet(chars_dir + f"enclosures/chars_{region_id}.parquet")
streets = gpd.read_parquet(chars_dir + f"streets/chars_{region_id}.parquet")
nodes = gpd.read_parquet(chars_dir + f"nodes/chars_{region_id}.parquet")

In [150]:
merged = pd.merge(
    tessellation.drop(columns=["geometry"]),
    buildings.drop(columns=["nodeID", "geometry"]),
    right_index=True,
    left_index=True,
)

merged = merged.merge(
    enclosures.drop(columns="geometry"),
    right_on="eID",
    left_on="enclosure_index",
    how="left",
)

merged = merged.merge(streets.drop(columns="geometry"), on="nID", how="left")
merged = merged.merge(nodes.drop(columns="geometry"), on="nodeID", how="left")

In [151]:
primary = merged.drop(
    columns=[
        "nID",
        "eID",
        "nodeID",
        "mm_len",
        "cdsbool",
        "node_start",
        "node_end",
        "x",
        "y",
        "enclosure_index",
        "id",  ## maybe keep
    ]
)

In [152]:
primary.to_parquet(chars_dir + f"primary_chars/chars_{region_id}.parquet")

In [169]:
## context lag

In [170]:
queen_3

<libpysal.graph.base.Graph at 0x75c488f65730>

In [None]:
import scipy as sp

skewness = pd.DataFrame(index=chars)
for c in chars:
    skewness.loc[c, "skewness"] = sp.stats.skew(primary[c])
headtail = list(skewness.loc[skewness.skewness >= 1].index)
to_invert = skewness.loc[skewness.skewness <= -1].index

In [193]:
gdf = primary.reset_index(drop=True)
for inv in to_invert:
    gdf[inv + "_r"] = gdf[inv].max() - gdf[inv]
inverted = [x for x in gdf.columns if "_r" in x]
headtail = headtail + inverted
natural = [x for x in chars if x not in headtail]

In [None]:
## compute all functions once, for each neighbour set
## if its still slow manually loop over it

In [196]:
%%time

res = {}

for c in primary.columns:
    gini = mm.gini(primary[c], queen_3)
    theil = mm.theil(primary[c], queen_3)
    vals_range = mm.values_range(primary[c], queen_3)

    res[c + "_gini"] = gini
    res[c + "_theil"] = theil
    res[c + "_range"] = vals_range

  return (r_x - n_x_sum - x_sum) / n_x_sum


ValueError: Values contain negative numbers. Normalise data beforeusing momepy.Gini.

### Tessellation fixing

In [83]:
region_id = 10
n_workers = -1

In [76]:
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")
enclosures = gpd.read_parquet(data_dir + f"/enclosures/enclosure_{region_id}.parquet")

In [7]:
%%time
tesselations = mm.enclosed_tessellation(buildings, enclosures.geometry, n_jobs=-1)

CPU times: user 31.7 s, sys: 1.28 s, total: 32.9 s
Wall time: 52.8 s


In [8]:
problem_buildings = buildings[
    ~np.isin(buildings.index.values, tesselations.index.values)
]
problem_buildings.shape

(4, 2)

In [59]:
problems = mm.CheckTessellationInput(problem_buildings)
problems.split

Collapsed features  : 0
Split features      : 2
Overlapping features: 0


Unnamed: 0,id,geometry
24618,v0.1-DEU.10.47.1.1_1-71112,"POLYGON ((4052371.706 3075630.179, 4052371.707..."
45001,v0.1-DEU.10.47.1.1_1-6152,"POLYGON ((4044792.222 3079260.545, 4044792.4 3..."


In [21]:
problem_buildings

Unnamed: 0,id,geometry
24618,v0.1-DEU.10.47.1.1_1-71112,"POLYGON ((4052371.706 3075630.179, 4052371.707..."
24624,v0.1-DEU.10.47.1.1_1-71412,"POLYGON ((4052382.985 3075622.32, 4052382.986 ..."
45001,v0.1-DEU.10.47.1.1_1-6152,"POLYGON ((4044792.222 3079260.545, 4044792.4 3..."
105660,v0.1-DEU.10.9.2.1_1-1292,"POLYGON ((4073046.554 3082946.121, 4073046.554..."


In [65]:
inp[res == 105660]

array([1079])

In [12]:
%%time
inp, res = buildings.geometry.sindex.query(enclosures.geometry, predicate="intersects")

CPU times: user 1.21 s, sys: 0 ns, total: 1.21 s
Wall time: 1.21 s


In [17]:
# find out which enclosures contain one and multiple buildings
unique, counts = np.unique(inp, return_counts=True)
splits = unique[counts > 1]
single = unique[counts == 1]
altered = unique[counts > 0]

In [66]:
np.where(splits == 1079)

(array([452]),)

In [18]:
# prepare input for parallel processing
tuples = [
    (
        enclosures.index[i],  # enclosure index
        enclosures.geometry.iloc[i],  # enclosure geometry
        buildings.iloc[res[inp == i]],  # buildings within the enclosure
    )
    for i in splits
]

In [67]:
tuples[452]

(1079,
 <POLYGON ((4072934.523 3082912.177, 4072936.229 3082925.284, 4072936.623 308...>,
                               id  \
 105657  v0.1-DEU.10.9.2.1_1-1432   
 105659  v0.1-DEU.10.9.2.1_1-1577   
 105661  v0.1-DEU.10.9.2.1_1-1716   
 105660  v0.1-DEU.10.9.2.1_1-1292   
 105663  v0.1-DEU.10.9.2.1_1-1054   
 105665   v0.1-DEU.10.9.2.1_1-879   
 105664  v0.1-DEU.10.9.2.1_1-1809   
 105667  v0.1-DEU.10.9.2.1_1-1471   
 105666  v0.1-DEU.10.9.2.1_1-1460   
 105670  v0.1-DEU.10.9.2.1_1-1256   
 105705  v0.1-DEU.10.9.2.1_1-1280   
 88664   v0.1-DEU.10.9.2.1_1-2191   
 105722  v0.1-DEU.10.9.2.1_1-2226   
 88662   v0.1-DEU.10.9.2.1_1-1626   
 105721  v0.1-DEU.10.9.2.1_1-2158   
 88663    v0.1-DEU.10.9.2.1_1-892   
 
                                                  geometry  
 105657  POLYGON ((4072992.406 3082940.332, 4072992.407...  
 105659  POLYGON ((4072949.456 3082929.05, 4072949.455 ...  
 105661  POLYGON ((4073055.989 3082956.858, 4073055.989...  
 105660  POLYGON ((4073046.554 3082

In [68]:
threshold = 0.05
shrink = 0.4
segment = 0.5
enclosure_id = "eID"

In [69]:
from libpysal.cg import voronoi_frames


def _tess(ix, poly, blg, threshold, shrink, segment, enclosure_id):
    """Generate tessellation for a single enclosure. Helper for enclosed_tessellation"""
    # check if threshold is set and filter buildings based on the threshold
    if threshold:
        blg = blg[
            shapely.area(shapely.intersection(blg.geometry.array, poly))
            > (shapely.area(blg.geometry.array) * threshold)
        ]
    print(blg.shape)
    if len(blg) >= 1:
        tess = voronoi_frames(
            blg,
            clip=poly,
            shrink=shrink,
            segment=segment,
            return_input=False,
            as_gdf=True,
        )
        tess[enclosure_id] = ix
        return tess

    return gpd.GeoDataFrame(
        {enclosure_id: ix},
        geometry=[poly],
        index=[-1],
        crs=blg.crs,
    )

In [None]:
105660

In [70]:
r = _tess(*tuples[452], threshold, shrink, segment, enclosure_id)
r.shape

(15, 2)


(14, 2)

In [78]:
# # m = r.explore()
# m = tuples[452][2].loc[[105660]].reset_index().explore( color='r')
# m

In [5]:
%%time
gabriel = Graph.build_triangulation(
    buildings.representative_point(), "gabriel", kernel="identity"
)
max_dist = gabriel.aggregate("max")
buffer = np.clip(max_dist / 2 + max_dist * 0.1, min_buffer, max_buffer).values

CPU times: user 17.5 s, sys: 795 ms, total: 18.2 s
Wall time: 18.2 s


In [6]:
%%time
buffered_buildings = buildings.buffer(buffer, resolution=2).union_all()

CPU times: user 1min 33s, sys: 289 ms, total: 1min 33s
Wall time: 1min 33s


In [7]:
%%time
enclosures = mm.enclosures(streets, limit=buffered_buildings)

CPU times: user 4.58 s, sys: 19.8 ms, total: 4.6 s
Wall time: 4.59 s


In [8]:
%%time
tesselations = mm.enclosed_tessellation(
    buildings.geometry, enclosures.geometry, n_jobs=-1
)

CPU times: user 1min 15s, sys: 3.07 s, total: 1min 18s
Wall time: 2min 15s


In [9]:
buildings.shape, (tesselations.index.values >= 0).sum()

((542350, 2), 543453)

In [10]:
problem_buildings = buildings[
    ~np.isin(buildings.index.values, tesselations.index.values)
]
problem_buildings.shape

(0, 2)

In [55]:
t2 = tesselations.dissolve(by=tesselations.index.values)

In [62]:
t2 = t2.explode()

In [70]:
t2 = t2.sort_index()

In [71]:
dup_indxs = t2.index.duplicated(keep=False)
duplicates = t2[dup_indxs].sort_index()
dup_building_indxs = duplicates.index.unique()

In [102]:
intersection_area = duplicates.intersection(
    buildings.loc[dup_building_indxs], align=True
).area

In [111]:
## sort by buildingid and area
intersection_area = (
    intersection_area.reset_index().sort_values(["index", 0]).set_index("index")
)

In [141]:
to_keep = intersection_area.index.duplicated(keep="first")

In [138]:
to_keep = np.where(dup_indxs)[0][to_keep]

In [140]:
dup_indxs[to_keep] = False

In [144]:
# m = duplicates[to_keep].explore()
# m = buildings.loc[dup_building_indxs].explore(m=m, color='r')
# m

In [None]:
# m = buildings.loc[[715, 717]].explore()
# # m = tesselations.loc[[717]].explore(m=m, color='r')
# m = enclosures.iloc[[36344]].explore(m=m, color='red')
# m = enclosures.iloc[[29810]].explore(m=m, color='green')
# m

In [None]:
intersection_area[to_keep]

In [59]:
# m = duplicates.loc[[714]].explore()
# m = buildings.loc[[714]].explore(m=m, color='r')
# m

In [31]:
duplicates["geometry"]

714       11754.783100
714       11754.783100
714       11754.783100
797          92.486634
797          92.486634
              ...     
540876      469.636678
541193     3256.125494
541193     3256.125494
542300      501.820042
542300      501.820042
Length: 1890, dtype: float64

In [11]:
t2 = tesselations[~tesselations.index.duplicated()].sort_index()

In [68]:
# buildings_graph.adjacency.loc[problem_buildings.index]

In [12]:
%%time
queen_1 = Graph.build_contiguity(t2, rook=False, strict=True).assign_self_weight()

CPU times: user 7min 7s, sys: 113 ms, total: 7min 8s
Wall time: 7min 8s


In [None]:
%%time
queen_3 = graph.higher_order(k=3, lower=True).assign_self_weight()

In [6]:
# bgraph = Graph.from_adjacency(queen_1.adjacency[queen_1.adjacency.index.get_level_values(0) >= 0 ].to_frame().reset_index())
# bgraph3 = Graph.from_adjacency(queen_3.adjacency[queen_3.adjacency.index.get_level_values(0) >= 0 ].to_frame().reset_index())