In [2]:
import glob

import geopandas as gpd
import momepy as mm
import numpy as np
from libpysal.graph import Graph
from shapely import coverage_simplify

regions_datadir = "/data/uscuni-ulce/"
data_dir = "/data/uscuni-ulce/processed_data/"
eubucco_files = glob.glob(regions_datadir + "eubucco_raw/*")
graph_dir = data_dir + "neigh_graphs/"

In [3]:
def generate_enclosures(buildings, streets):
    ## generate additional_boundaries
    min_buffer: float = 0
    max_buffer: float = 100

    gabriel = Graph.build_triangulation(
        buildings.representative_point(), "gabriel", kernel="identity"
    )
    max_dist = gabriel.aggregate("max")
    buffer = np.clip(max_dist / 2 + max_dist * 0.1, min_buffer, max_buffer).values
    # buffer = 100
    buffered_buildings = buildings.buffer(buffer, resolution=2).union_all()

    enclosures = mm.enclosures(streets, limit=buffered_buildings)
    return enclosures


def generate_tess(buildings, enclosures, n_workers=1):
    tesselations = mm.enclosed_tessellation(
        buildings, enclosures.geometry, n_jobs=n_workers
    )

    # deal with split buildings
    tesselations = tesselations.dissolve(by=tesselations.index.values)

    # drop empty spaces with no buildings and a positive index,
    # leave negatives in the geodataframe
    tesselations = tesselations.explode()
    inp, res = buildings.geometry.centroid.sindex.query(tesselations.geometry)
    to_keep = np.append(np.unique(inp), np.where(tesselations.index.values < 0)[0])
    tesselations = tesselations.iloc[to_keep]

    ### drop any remaining duplicates
    ## sometimes -1s have multiple tesselation cells
    tesselations = tesselations[~tesselations.index.duplicated()].sort_index()

    # simplification will be moved to momepy in the end
    simplified_tessellation = coverage_simplify(tesselations.geometry, 1)
    tesselations = gpd.GeoDataFrame(
        {
            "geometry": simplified_tessellation.geoms,
            "eID": tesselations["enclosure_index"],
        },
        index=tesselations.index,
        crs=tesselations.crs,
    )
    return tesselations

In [4]:
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [5]:
# 12199 - hills, small test
# 69300 - prague medium
# 226 - germany somewhere, largest cluster

for region_id, region_hull in region_hulls.iterrows():
    if region_id < 106928:
        continue

    break

In [6]:
buildings = gpd.read_parquet(data_dir + f"/buildings/buildings_{region_id}.parquet")
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region_id}.parquet")

In [7]:
enclosures = generate_enclosures(buildings, streets)

In [8]:
%%time
tessellation = generate_tess(buildings, enclosures, n_workers=-1)

CPU times: user 10.3 s, sys: 418 ms, total: 10.7 s
Wall time: 14.9 s


In [9]:
k = 5
n_splits = 10

In [10]:
buildings_q1 = Graph.build_contiguity(buildings, rook=False).assign_self_weight()

In [11]:
blo_q1 = Graph.build_contiguity(
    enclosures, rook=False, strict=True
).assign_self_weight()
blo_q5 = blo_q1.higher_order(k=k, lower_order=True, diagonal=True)

In [12]:
# m = enclosures.explore()
# m = blo_q1.explore(enclosures, m=m)
# m

In [13]:
street_q1 = Graph.build_contiguity(
    streets, rook=False, strict=True
).assign_self_weight()
street_q5 = street_q1.higher_order(k=k, lower_order=True, diagonal=True)

In [14]:
# m = streets.loc[street_q5[0].index.values].explore(color='red')
# m = streets.loc[[0]].explore(m=m)
# m

In [19]:
sorted_streets = streets.sort_values("geometry").index.values
partial_higher = next(
    lazy_higher_order(street_q1, k=k, n_splits=n_splits, iteration_order=sorted_streets)
)
street_focals = np.setdiff1d(partial_higher.unique_ids, partial_higher.isolates)
street_neighbours = partial_higher.isolates
street_all = partial_higher.unique_ids

In [23]:
# m = streets.loc[street_neighbours].explore(color='red')
# m = streets.loc[street_focals].explore(m=m)
# m = region_hulls.loc[[region_id]].explore(m=m, color='r')
# m

In [104]:
# ns = np.unique(np.concatenate([street_q5[f].index.values for f in street_focals]))
# m = streets.loc[ns].explore(color='red')
# m = streets.loc[street_focals].explore(m=m)
# m

In [106]:
# streets.explore()

In [33]:
# m = enclosures.dissolve(labels).loc[[6]].reset_index().explore()
# m = enclosures.loc[[1082]].reset_index().explore(color='r')
# m

In [34]:
# enclosures.loc[blo_q1[2587].index.values].explore()

## Setup with enclosure graph

In [68]:
sorted_enclosures = enclosures.sort_values(by="geometry").index.values

In [18]:
from utils import lazy_higher_order

In [80]:
partial_higher = next(
    lazy_higher_order(blo_q1, k=k, n_splits=n_splits, iteration_order=sorted_enclosures)
)

In [81]:
partial_higher = next(
    lazy_higher_order(blo_q1, k=k, n_splits=n_splits, iteration_order=sorted_enclosures)
)

enclosure_focals = np.setdiff1d(partial_higher.unique_ids, partial_higher.isolates)
enclosure_neighbours = partial_higher.isolates
enclosure_all = partial_higher.unique_ids

In [82]:
inp, res = buildings.sindex.query(enclosures.geometry, predicate="intersects")

In [83]:
focal_geometries = enclosures.loc[enclosure_focals]

In [85]:
# focal_geometries.explore()

In [38]:
neighbours_indices = blo_q5[0].index

In [39]:
neighbours_geometry = enclosures.loc[neighbours_indices]

In [40]:
neighbours_buildings_idxs = np.unique(res[np.isin(inp, neighbours_indices)])

In [41]:
focal_buildings_idxs = np.unique(res[np.isin(inp, focal)])

## Generate elements locally

In [42]:
neighbours_buildings = buildings.iloc[neighbours_buildings_idxs]

In [43]:
_, interecting_streets = streets.sindex.query(neighbours_geometry.geometry)
neighbours_streets = streets.iloc[np.unique(interecting_streets)]

In [44]:
interecting_streets = streets.sindex.query(focal_geometry.geometry)
focal_streets = streets.iloc[np.unique(interecting_streets)]

In [45]:
# neighbours_geometry.geometry.buffer(100, resolution=2).explore()

In [46]:
neighbours_enclosures = generate_enclosures(neighbours_buildings, neighbours_streets)

In [52]:
# m = neighbours_enclosures.explore()
# m = enclosures.explore(m=m, color='r')
# m

In [48]:
# m = neighbours_enclosures.explore()
# m = enclosures.loc[neighbours_indices].explore(m=m, color='r')
# m

In [53]:
neighbours_tesselations = generate_tess(buildings, neighbours_enclosures)

In [54]:
# neighbours_tesselations.explore()

In [56]:
# m = neighbours_tesselations[neighbours_tesselations.index >= 0].reset_index().explore()
# m = tessellation.loc[tessellation.index.isin(neighbours_buildings_idxs) & (tessellation.index >= 0)].reset_index().explore(m=m, color='r')
# m = neighbours_buildings.explore(m=m, color='green')
# m = neighbours_tesselations.loc[[18402]].explore(m=m, color='red')
# m

In [57]:
from pandas.testing import assert_frame_equal

## test farthest reaching building char

In [62]:
queen_1 = Graph.build_contiguity(tessellation, rook=False, strict=True)
neigh_queen_1 = Graph.build_contiguity(neighbours_tesselations, rook=False, strict=True)

In [63]:
queen_1.adjacency.shape, neigh_queen_1.adjacency.shape

((182119,), (6159,))

In [64]:
different_ids = []
for i in focal_buildings_idxs:
    if sorted(queen_1[i].index) != sorted(neigh_queen_1[i].index):
        different_ids.append(i)

In [65]:
queen_1[different_ids[0]].index

Index([18401, 18405, 18427, 18428], dtype='int64', name='neighbor')

In [66]:
neigh_queen_1[different_ids[0]].index

Index([18401, 18427, 18428], dtype='int64', name='neighbor')

In [67]:
bgraph = queen_1.subgraph(buildings.index.values)
bgraph3 = bgraph.higher_order(k=3, lower_order=True).assign_self_weight()
vals = mm.mean_interbuilding_distance(buildings, bgraph, bgraph3)

KeyError: '[15748 25219] not in index'

In [34]:
neigh_bgraph = neigh_queen_1.subgraph(neighbours_buildings.index.values)
neigh_bgraph3 = neigh_bgraph.higher_order(k=3, lower_order=True).assign_self_weight()
neigh_vals = mm.mean_interbuilding_distance(
    neighbours_buildings, neigh_bgraph, neigh_bgraph3
)

In [35]:
neigh_vals = neigh_vals.loc[focal_buildings_idxs]
vals = vals.loc[focal_buildings_idxs]

In [36]:
from pandas.testing import assert_series_equal

assert_series_equal(
    neigh_vals.loc[focal_buildings_idxs],
    vals.loc[focal_buildings_idxs],
    check_exact=False,
)

### test fartherst reaching node chars

In [37]:
graph = mm.gdf_to_nx(streets)
graph = mm.node_degree(graph)
graph = mm.subgraph(
    graph,
    radius=5,
    meshedness=True,
    cds_length=False,
    mode="sum",
    degree="degree",
    length="mm_len",
    mean_node_degree=False,
    proportion={0: True, 3: True, 4: True},
    cyclomatic=False,
    edge_node_ratio=False,
    gamma=False,
    local_closeness=True,
    closeness_weight="mm_len",
    verbose=False,
)
graph = mm.cds_length(graph, radius=3, name="ldsCDL", verbose=False)
graph = mm.clustering(graph, name="xcnSCl")
graph = mm.mean_node_dist(graph, name="mtdMDi", verbose=False)
nodes, edges, w = mm.nx_to_gdf(graph, spatial_weights=True)
nodes_w5 = Graph.from_W(w).higher_order(k=5, lower_order=True).assign_self_weight()
nodes["lddNDe"] = mm.node_density(nodes, edges, nodes_w5)
nodes["linWID"] = mm.node_density(
    nodes,
    edges,
    nodes_w5,
    weighted=True,
)
nodes_old = nodes

 There are 2256 disconnected components.


In [38]:
graph = mm.gdf_to_nx(neighbours_streets)
graph = mm.node_degree(graph)
graph = mm.subgraph(
    graph,
    radius=5,
    meshedness=True,
    cds_length=False,
    mode="sum",
    degree="degree",
    length="mm_len",
    mean_node_degree=False,
    proportion={0: True, 3: True, 4: True},
    cyclomatic=False,
    edge_node_ratio=False,
    gamma=False,
    local_closeness=True,
    closeness_weight="mm_len",
    verbose=False,
)
graph = mm.cds_length(graph, radius=3, name="ldsCDL", verbose=False)
graph = mm.clustering(graph, name="xcnSCl")
graph = mm.mean_node_dist(graph, name="mtdMDi", verbose=False)
nodes, edges, w = mm.nx_to_gdf(graph, spatial_weights=True)
nodes_w5 = Graph.from_W(w).higher_order(k=5, lower_order=True).assign_self_weight()
nodes["lddNDe"] = mm.node_density(nodes, edges, nodes_w5)
nodes["linWID"] = mm.node_density(
    nodes,
    edges,
    nodes_w5,
    weighted=True,
)
nodes_new = nodes

 There are 77 disconnected components.


In [39]:
n1 = nodes_old.iloc[nodes_old.sindex.query(focal_geometry.geometry)]
n1 = n1.sort_values(["x", "y"]).reset_index(drop=True)
n2 = nodes_new.iloc[nodes_new.sindex.query(focal_geometry.geometry)]
n2 = n2.sort_values(["x", "y"]).reset_index(drop=True)

In [40]:
assert_frame_equal(
    n1.loc[:, ~n1.columns.isin(["local_closeness", "geometry", "nodeID"])],
    n2.loc[:, ~n2.columns.isin(["local_closeness", "geometry", "nodeID"])],
)

In [41]:
# n1 = nodes_old.iloc[nodes_old.sindex.query(focal_geometry.geometry)]
# n1['geometry'] = n1.buffer(10)

# n2 = nodes_new.iloc[nodes_new.sindex.query(focal_geometry.geometry)]
# n2['geometry'] = n2.buffer(5)

# m = n1.explore()
# m = n2.explore(m=m, color='r')
# m

### test fartherst reaching enclosure chars

In [42]:
beid = buildings.merge(tessellation["eID"], left_index=True, right_index=True)["eID"]
beid = neighbours_buildings.merge(
    neighbours_tesselations["eID"], left_index=True, right_index=True
)["eID"]

In [43]:
old_vals = mm.count(enclosures, buildings, beid, weighted=False)
new_vals = mm.count(neighbours_enclosures, neighbours_buildings, beid, weighted=False)
assert old_vals[focal] == new_vals[focal]

In [44]:
old_vals = mm.count(enclosures, buildings, beid, weighted=True)
new_vals = mm.count(neighbours_enclosures, neighbours_buildings, beid, weighted=True)
assert old_vals[focal] == new_vals[focal]

AssertionError: 

### test fartherst reaching edges chars

In [45]:
graph = mm.gdf_to_nx(streets)
nodes, edges_old = mm.nx_to_gdf(graph, spatial_weights=False)
edges_w3_old = (
    Graph.build_contiguity(edges_old, rook=False, strict=True)
    .higher_order(k=3, lower_order=True)
    .assign_self_weight()
)

tess = tessellation[tessellation.index > -1]
tess_nid = mm.get_network_id(tess, edges_old, network_id=edges_old.index, verbose=False)
res_old = mm.describe_reached(tess.geometry.area, tess_nid, graph=edges_w3_old)
interecting_streets_old = np.unique(edges_old.sindex.query(focal_geometry.geometry))

  tess_nid = mm.get_network_id(tess, edges_old, network_id=edges_old.index, verbose=False)


In [46]:
# focal_streets_old.explore()

In [47]:
graph = mm.gdf_to_nx(neighbours_streets)
nodes, edges = mm.nx_to_gdf(graph, spatial_weights=False)
tess = neighbours_tesselations[neighbours_tesselations.index > -1]

edges_w3_new = (
    Graph.build_contiguity(edges, rook=False, strict=True)
    .higher_order(k=3, lower_order=True)
    .assign_self_weight()
)

tess_nid = mm.get_network_id(tess, edges, network_id=edges.index, verbose=False)
res_new = mm.describe_reached(tess.geometry.area, tess_nid, graph=edges_w3_new)
interecting_streets = np.unique(edges.sindex.query(focal_geometry.geometry))
focal_streets_new = res_new.iloc[interecting_streets]

  tess_nid = mm.get_network_id(tess, edges, network_id=edges.index, verbose=False)


In [48]:
focal_streets_old = res_old.iloc[interecting_streets_old]
focal_streets_old["osm_id"] = edges_old.iloc[interecting_streets_old]["osm_id"]
focal_streets_old["geometry"] = edges_old.iloc[interecting_streets_old]["geometry"]
focal_streets_old = gpd.GeoDataFrame(focal_streets_old)
focal_streets_old = focal_streets_old.set_index("osm_id")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  focal_streets_old['osm_id'] = edges_old.iloc[interecting_streets_old]['osm_id']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  focal_streets_old['geometry'] = edges_old.iloc[interecting_streets_old]['geometry']


In [49]:
focal_streets_new = res_new.iloc[interecting_streets]
focal_streets_new["osm_id"] = edges.iloc[interecting_streets]["osm_id"]
focal_streets_new["geometry"] = edges.iloc[interecting_streets]["geometry"]
focal_streets_new = gpd.GeoDataFrame(focal_streets_new)
focal_streets_new = focal_streets_new.set_index("osm_id")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  focal_streets_new['osm_id'] = edges.iloc[interecting_streets]['osm_id']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  focal_streets_new['geometry'] = edges.iloc[interecting_streets]['geometry']


In [50]:
r1 = (
    Graph.build_contiguity(edges_old.set_index("osm_id"), rook=False, strict=True)
    .higher_order(k=3, lower_order=True)
    .assign_self_weight()
)

In [51]:
r2 = (
    Graph.build_contiguity(edges.set_index("osm_id"), rook=False, strict=True)
    .higher_order(k=3, lower_order=True)
    .assign_self_weight()
)

In [55]:
effect_size1 = np.concatenate([r1[f].index.values for f in focal_streets_old.index])
effect_size2 = np.concatenate([r2[f].index.values for f in focal_streets_old.index])
np.setdiff1d(effect_size1, effect_size2)

array(['42399700'], dtype=object)

In [56]:
np.where(streets["osm_id"] == "42399700")

(array([1123]),)

In [57]:
# m = neighbours_streets.explore()
# m = neighbours_geometry.explore(m=m, color='r')
# m = streets.iloc[[1123]].explore(m=m, color='red')
# m

### test fartherst reaching tesselation chars

In [58]:
queen_3 = (
    Graph.build_contiguity(tessellation, rook=False, strict=True)
    .higher_order(k=3, lower_order=True)
    .assign_self_weight()
)
neigh_queen_3 = (
    Graph.build_contiguity(neighbours_tesselations, rook=False, strict=True)
    .higher_order(k=3, lower_order=True)
    .assign_self_weight()
)

In [59]:
vals_old = mm.block_counts(tessellation["eID"], queen_3).loc[focal_buildings_idxs]
vals_new = mm.block_counts(neighbours_tesselations["eID"], neigh_queen_3).loc[
    focal_buildings_idxs
]

In [60]:
assert_series_equal(vals_old, vals_new)

In [57]:
# m = tessellation.loc[focal_buildings_idxs].reset_index().explore()
# m = neighbours_tesselations.loc[focal_buildings_idxs].reset_index().explore(m=m, color='r')
# m

### Hilbert stuff

In [122]:
sorted_enclosures = enclosures.sort_values(by="geometry")

In [123]:
for i in enclosures.index:
    focal = np.where(sorted_enclosures.index == i)[0][0]
    neighbours = sorted_enclosures.iloc[focal - 100 : focal + 1000,].index
    assert np.isin(blo_q1[i].index, neighbours).all()

AssertionError: 

In [145]:
# m = sorted_enclosures.iloc[focal - 100: focal + 100, ].explore()
# m = sorted_enclosures.iloc[[focal], ].explore(m=m, color='red')
# m = enclosures.loc[blo_q1[i].index].explore(color='r', m=m)
# m

### numba bsf

In [514]:
from numba.typed import List

In [515]:
aggregations = (
    blo_q1.adjacency.index.to_frame()
    .reset_index(drop=True)
    .groupby("focal")["neighbor"]
    .unique()
)
numba_graph = list(aggregations)

In [516]:
@numba.jit()
def numba_process_subgraph(start, graph, weights, visited, max_limit=8000):
    stack = [start]
    current_weight = weights[start]
    current_list = [start]
    current_list.pop()
    lists = [current_list]
    lists.pop()

    while len(stack):
        current = stack.pop()
        # the index has not been processed and there is space in the current list
        if (current not in visited) and (
            current_weight + weights[current] <= max_limit
        ):
            visited.add(current)
            current_weight += weights[current]
            current_list.append(current)

            for v in graph[current]:
                if v not in visited:
                    stack.append(v)

        # the index has not been processed, but the current list has reached max capacity
        elif current not in visited:
            # reset everything
            lists.append(current_list)
            current_weight = 0
            current_list = [start]
            current_list.pop()
            stack = [current]  ### reinsert into stack

        # if the current index has been processed continue
        else:
            continue

    # # ### the graph disconnects
    lists.append(current_list)
    return lists, visited

In [517]:
@numba.jit
def group_geoms(ids, numba_graph, weights):
    groups = List([[0, 1, 2]])
    groups.pop()
    visited = set([ids.max() + 1])
    for s in ids:
        sub_list, visited = numba_process_subgraph(s, numba_graph, weights, visited)
        for l in sub_list:
            groups.append(l)
    return groups

In [521]:
groups = group_geoms(enclosures.index.values, numba_graph, weights.values)