In [16]:
import datetime
import gc
import glob

import geopandas as gpd
import momepy as mm
import numpy as np
import pandas as pd
import shapely
from libpysal.graph import Graph
from shapely import coverage_simplify

In [17]:
regions_datadir = "/data/uscuni-ulce/"

In [18]:
tesselation_files = glob.glob(regions_datadir + "tesselations/*.parquet")

In [None]:
building_region_mapping = pd.read_parquet(
    regions_datadir + "regions/" + "id_to_region.parquet", engine="pyarrow"
)
typed_dict = pd.Series(
    np.arange(building_region_mapping["id"].values.shape[0]),
    index=building_region_mapping["id"].values,
)
region_ids = building_region_mapping.groupby("region")["id"].unique()
del building_region_mapping  # its 2/3 gb
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [19]:
def process_tessellation_graph(region_id):
    ## tessellation graphs
    tessellation = gpd.read_parquet(
        f"/data/uscuni-ulce/tesselations/tesselation_{region_id}.parquet"
    )
    simplified_tessellation = coverage_simplify(
        tessellation.geometry, 1
    )  # wont be needed in the end...

    del tessellation
    gc.collect()

    inputdf = gpd.GeoDataFrame(
        {"geometry": simplified_tessellation.geoms},
        index=tesselation.index,
        crs=tesselation.crs,
    )

    del simplified_tessellation
    gc.collect()
    print("Simplified", region_id)

    graph = Graph.build_contiguity(inputdf, rook=False)
    graph.to_parquet(
        regions_datadir
        + "neigh_graphs/"
        + f"tessellation_graph_{region_id}_knn1.parquet"
    )
    print("Build graph knn=1", region_id)

    gc.collect()

    graph3 = graph.higher_order(k=3, lower_order=True)
    graph3.to_parquet(
        regions_datadir
        + "neigh_graphs/"
        + f"tessellation_graph_{region_id}_knn3.parquet"
    )
    print("Build graph knn=3", region_id)

    del graph
    del graph3
    gc.collect()

    buildings, streets = read_data(typed_dict, region_ids, region_hull, region_id)


for region_id, region_hull in region_hulls.iterrows():
    print(
        datetime.datetime.now(),
        "----Processing tesselation------",
        region_id,
    )
    region_hull = region_hull["convex_hull"]

    process_tessellation_graph(region_id)

    break

In [23]:
inputdf = gpd.GeoDataFrame(
    {"geometry": simplified_tessellation.geoms},
    index=tesselation.index,
    crs=tesselation.crs,
)

In [24]:
%%time
graph = Graph.build_contiguity(inputdf, rook=False)

CPU times: user 34.4 s, sys: 676 ms, total: 35.1 s
Wall time: 35.1 s


In [None]:
graph.to_parquet("data/tessellation_226_simplified.parquet")

In [64]:
tesselation = gpd.read_parquet("/data/uscuni-ulce/tesselations/tesselation_226.parquet")

In [65]:
# tesselation[tesselation.area > 50_000].reset_index().explore()

In [68]:
%%time
res = coverage_simplify(tesselation.geometry, 1)

CPU times: user 16min 55s, sys: 16.9 s, total: 17min 12s
Wall time: 17min 12s


In [72]:
res = gpd.GeoDataFrame(
    {"geometry": res.geoms}, index=tesselation.index, crs=tesselation.crs
)

In [75]:
shapely.get_coordinates(res).shape

(91535874, 2)

In [77]:
shapely.get_coordinates(tesselation.geometry).shape

(833311885, 2)

In [78]:
91535874 / 833311885

0.10984587601315683

In [63]:
# m = tesselation.iloc[:1].explore()
# m = res.iloc[:1].explore(m=m, color='r')
# m

In [None]:
simplified_tessellation = coverage_simplify(
    tessellation.geometry, 1
)  # wont be needed in the end...
tessellation = gpd.GeoDataFrame(
    {"geometry": simplified_tessellation.geoms},
    index=tesselation.index,
    crs=tesselation.crs,
)

In [79]:
res.to_parquet("data/tessellation_226_simplified.parquet")

In [81]:
graph = Graph.build_contiguity(res, rook=False)

CPU times: user 3min 27s, sys: 4.38 s, total: 3min 32s
Wall time: 3min 31s


In [82]:
graph.to_parquet("data/tess_contiguity.parquet")

In [None]:
from libpysal.graph import read_parquet

In [20]:
graph = read_parquet("data/tess_contiguity.parquet")

In [21]:
%%time
graph3 = graph.higher_order(k=3, lower_order=True)

CPU times: user 10min 34s, sys: 46.6 s, total: 11min 21s
Wall time: 11min 19s


In [22]:
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [26]:
read_mask = region_hulls.loc[226, "convex_hull"].buffer(100)

In [27]:
streets = gpd.read_parquet(
    regions_datadir + "streets/central_europe_streets_eubucco_crs.parquet"
)

In [28]:
%%time
streets = streets[streets.intersects(read_mask)]

CPU times: user 52.4 s, sys: 160 ms, total: 52.6 s
Wall time: 52.5 s


In [32]:
%%time
graph = mm.gdf_to_nx(streets)

CPU times: user 27.8 s, sys: 388 ms, total: 28.2 s
Wall time: 28 s


In [33]:
%%time
graph = mm.node_degree(graph)

CPU times: user 10.8 s, sys: 180 ms, total: 11 s
Wall time: 11 s


In [34]:
%%time
graph = mm.subgraph(
    graph,
    radius=5,
    meshedness=True,
    cds_length=False,
    mode="sum",
    degree="degree",
    length="mm_len",
    mean_node_degree=False,
    proportion={0: True, 3: True, 4: True},
    cyclomatic=False,
    edge_node_ratio=False,
    gamma=False,
    local_closeness=True,
    closeness_weight="mm_len",
    verbose=False,
)

CPU times: user 3min 46s, sys: 225 ms, total: 3min 47s
Wall time: 3min 46s


In [35]:
%%time
graph = mm.cds_length(graph, radius=3, name="ldsCDL", verbose=False)

CPU times: user 2min 1s, sys: 260 ms, total: 2min 2s
Wall time: 2min 1s


In [36]:
%%time
graph = mm.clustering(graph, name="xcnSCl")

CPU times: user 13.9 s, sys: 39.6 ms, total: 14 s
Wall time: 14 s


In [37]:
%%time
graph = mm.mean_node_dist(graph, name="mtdMDi", verbose=False)

CPU times: user 19.9 s, sys: 344 ms, total: 20.3 s
Wall time: 20.3 s


In [38]:
%%time
nodes, edges, sw = mm.nx_to_gdf(graph, spatial_weights=True)

 There are 600063 disconnected components.


CPU times: user 26.9 s, sys: 12.4 ms, total: 26.9 s
Wall time: 26.9 s


In [None]:
## tess 1, 3
## buildings 1
## edges 1 3
## enclosures 1
## nodes 1, 5 - from nx.graph


##context 10... numba maybe...

In [None]:
enclosures["ldeAre"] = momepy.Area(enclosures).series
enclosures["ldePer"] = momepy.Perimeter(enclosures).series
enclosures["lseCCo"] = momepy.CircularCompactness(enclosures, "ldeAre").series
enclosures["lseERI"] = momepy.EquivalentRectangularIndex(
    enclosures, "ldeAre", "ldePer"
).series
enclosures["lseCWA"] = momepy.CompactnessWeightedAxis(
    enclosures, "ldeAre", "ldePer"
).series
enclosures["lteOri"] = momepy.Orientation(enclosures).series

blo_q1 = libpysal.weights.contiguity.Queen.from_dataframe(enclosures, ids="eID")


### thats mm.node_density...
inp, res = enclosures.sindex.query_bulk(enclosures.geometry, predicate="intersects")
indices, counts = np.unique(inp, return_counts=True)
enclosures["neighbors"] = counts - 1
enclosures["lteWNB"] = enclosures["neighbors"] / enclosures["ldePer"]

In [None]:
# Measure weighted cells within enclosure
encl_counts = tess.groupby("eID").count()
merged = enclosures[["eID", "ldeAre"]].merge(
    encl_counts[["tessellation"]], how="left", on="eID"
)
enclosures["lieWCe"] = merged["tessellation"] / merged["ldeAre"]

In [None]:
tess["ltcWRE"] = momepy.BlocksCount(tess, "eID", queen_3, "tID").series

In [None]:
# get node id
%time links = momepy.get_network_ratio(tess, edges)
tess[["edgeID_keys2", "edgeID_values2"]] = links
%time tess['nodeID'] = momepy.get_node_id(tess, nodes, edges, node_id='nodeID', edge_keys='edgeID_keys2', edge_values='edgeID_values2')

In [None]:
%%time
nodes["sddAre"] = momepy.Reached(
    nodes, tess, "nodeID", "nodeID", mode="sum", values="sdcAre"
).series

In [10]:
for tf in tesselation_files:
    tesselations = gpd.read_parquet(tf)
    tarea = tesselations.area
    print(tf, (tarea > 50_000).sum())

/data/uscuni-ulce/tesselations/tesselation_1554.parquet 3
/data/uscuni-ulce/tesselations/tesselation_13224.parquet 4
/data/uscuni-ulce/tesselations/tesselation_32541.parquet 4
/data/uscuni-ulce/tesselations/tesselation_100115.parquet 81
/data/uscuni-ulce/tesselations/tesselation_120665.parquet 12
/data/uscuni-ulce/tesselations/tesselation_99170.parquet 13
/data/uscuni-ulce/tesselations/tesselation_37812.parquet 9
/data/uscuni-ulce/tesselations/tesselation_104389.parquet 23
/data/uscuni-ulce/tesselations/tesselation_8191.parquet 3
/data/uscuni-ulce/tesselations/tesselation_108009.parquet 6
/data/uscuni-ulce/tesselations/tesselation_42925.parquet 6
/data/uscuni-ulce/tesselations/tesselation_8754.parquet 210
/data/uscuni-ulce/tesselations/tesselation_35468.parquet 3
/data/uscuni-ulce/tesselations/tesselation_113651.parquet 42
/data/uscuni-ulce/tesselations/tesselation_46214.parquet 31
/data/uscuni-ulce/tesselations/tesselation_56119.parquet 10
/data/uscuni-ulce/tesselations/tesselation_52

In [11]:
# tesselations.reset_index().explore()

In [20]:
!pwd

/home/krasen/notebooks


In [21]:
import geopandas as gpd
from libpysal.graph import Graph

buildings = gpd.read_parquet("data/buffer_problem_buildings.parquet")
gabriel = Graph.build_triangulation(buildings.centroid, "gabriel", kernel="identity")