In [1]:
import gc
import glob

import geopandas as gpd
import momepy as mm
import numpy as np
import pandas as pd
import shapely
from libpysal.graph import Graph, read_parquet
import datetime

In [2]:
buildings_dir = '/data/uscuni-ulce/processed_data/buildings/'
streets_dir = '/data/uscuni-ulce/processed_data/streets/'
enclosures_dir = '/data/uscuni-ulce/processed_data/enclosures/'
tessellations_dir = '/data/uscuni-ulce/processed_data/tessellations/'
graph_dir = '/data/uscuni-ulce/processed_data/neigh_graphs/'
chars_dir = '/data/uscuni-ulce/processed_data/chars/'

regions_datadir = "/data/uscuni-ulce/"
eubucco_files = glob.glob(regions_datadir + "eubucco_raw/*")

In [3]:
## to build a new conda env
## conda install -c conda-forge momepy umap-learn fast_hdbscan jupyterlab pyarrow matplotlib lonboard folium mapclassify datashader bokeh holoviews dask
# pip install -e . for shapely, momepy, core, geoplanar, fast_hdbscan

In [4]:
### used for testing
# osm_ids = ['v0.1-DEU.9.8.13.1_1-194', 'v0.1-DEU.9.8.13.1_1-232',
#        'v0.1-DEU.9.8.13.1_1-214', 'v0.1-DEU.9.8.13.1_1-273',
#        'v0.1-DEU.9.8.13.1_1-211', 'v0.1-DEU.9.8.13.1_1-188',
#        'v0.1-DEU.9.8.13.1_1-192', 'v0.1-DEU.9.8.13.1_1-276',
#        'v0.1-DEU.9.8.13.1_1-191', 'v0.1-DEU.9.8.13.1_1-184',
#        'v0.1-DEU.9.8.13.1_1-245', 'v0.1-DEU.9.8.13.1_1-275']

In [5]:
region_name = 69300
region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "regions_hull.parquet"
    )
region_id, region_hull = region_hulls.loc[region_name].name, region_hulls.loc[region_name].convex_hull

In [15]:
## freiburg
gdf = gpd.read_parquet('../data/freiburg/buildings_freiburg.parquet').to_crs(epsg=3035)
region_id = region_name = 'freiburg'
region_hull = gdf.union_all().convex_hull
region_hulls = gpd.GeoDataFrame({'geometry': [region_hull]}, index=[region_id], crs=gdf.crs)
buildings_dir = streets_dir = enclosures_dir = tessellations_dir = graph_dir = '../data/freiburg/'
chars_dir = '../data/freiburg/chars/'

## Streets

In [16]:
from core.generate_streets import process_region_streets

In [17]:
## overture is indexed based on 4326
overture_hull = region_hulls.loc[[region_name], ].to_crs(epsg=4326).convex_hull.iloc[0]

In [18]:
%%time
## processs streets
streets = process_region_streets(overture_hull, region_id)
## save streets
streets.to_parquet(streets_dir + f'streets_{region_id}.parquet')

CPU times: user 1.83 s, sys: 931 ms, total: 2.76 s
Wall time: 2min 24s


  tunnels = streets[streets.road.str.contains('tunnel').fillna(False)].set_crs(epsg=4236).to_crs(epsg=3035)


In [20]:
# streets.explore(tiles='cartodbpositron', prefer_canvas=True)

In [None]:
# streets.plot()

## Buildings


In [None]:
from core.generate_buildings import read_region_buildings, process_region_buildings

In [None]:
## need to link eubucco building polygons to regions, this will change in the future
building_region_mapping = pd.read_parquet(
    regions_datadir + "regions/" + "id_to_region.parquet", engine="pyarrow"
)
typed_dict = pd.Series(
    np.arange(building_region_mapping["id"].values.shape[0]),
    index=building_region_mapping["id"].values,
)
region_ids = building_region_mapping.groupby("region")["id"].unique()
del building_region_mapping  # its 2/3 gb

In [None]:
%%time

buildings = read_region_buildings(
    typed_dict, region_ids, region_hull, region_id
)

buildings = process_region_buildings(buildings, True, simplification_tolerance=.1, merge_limit=25)



In [None]:
buildings.to_parquet(buildings_dir + f"buildings_{region_id}.parquet")

## Enclosures & Tessellation

In [21]:
from core.generate_elements import process_region_elements

In [22]:
%%time
enclosures, tesselations = process_region_elements(buildings_dir, streets_dir, region_id)

---- Processing region:  freiburg 2024-08-20 14:14:21.323851
CPU times: user 9.24 s, sys: 618 ms, total: 9.86 s
Wall time: 14.1 s


In [23]:
enclosures.to_parquet(enclosures_dir + f"enclosure_{region_id}.parquet")
print("Processed enclosures")

## save files
tesselations.to_parquet(
    tessellations_dir + f"tessellation_{region_id}.parquet"
)
print("processed tesselations")

Processed enclosures
processed tesselations


In [22]:
# import lonboard
# layer = lonboard.PolygonLayer.from_geopandas(tesselations, opacity=0.15)
# m = lonboard.Map([layer])
# m

In [11]:
# layer = lonboard.PolygonLayer.from_geopandas(enclosures, opacity=0.15)
# m = lonboard.Map([layer])
# m

## Graphs

In [24]:
from core.generate_ngraphs import process_region_graphs

In [25]:
%%time
process_region_graphs(
    region_id,
    graph_dir,
    buildings_dir,
    streets_dir,
    enclosures_dir,
    tessellations_dir,
)

Built tess graph knn=1
Built buildings graph knn=1
Built streets graph knn=1
Built enclosure graph knn=1
Built nodes graph knn=1
CPU times: user 3.33 s, sys: 55.8 ms, total: 3.39 s
Wall time: 3.38 s


 There are 27 disconnected components.


## Characters

In [26]:
from core.generate_chars import process_single_region_chars, process_street_chars

In [27]:
%%time
process_single_region_chars(
    region_id,
    graph_dir,
    buildings_dir,
    streets_dir,
    enclosures_dir,
    tessellations_dir,
    chars_dir
)

2024-08-20 14:14:45.174960 ----Processing ------ freiburg
Processing streets
Processing enclosures
Processing buildings


  angles = np.arccos(cosine_angle)
  angles = np.arccos(cosine_angle)
  return np.nanmean(np.abs(90 - degrees[true_angles]))
  angles = np.arccos(cosine_angle)
  return Series({"mean": np.nanmean(dists), "std": np.nanstd(dists)})
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Processing tessellation
CPU times: user 54.8 s, sys: 260 ms, total: 55 s
Wall time: 54.9 s


## Generate primary data

In [28]:
tessellation = gpd.read_parquet(chars_dir + f"tessellations_chars_{region_id}.parquet")
buildings = gpd.read_parquet(chars_dir + f"buildings_chars_{region_id}.parquet")
enclosures = gpd.read_parquet(chars_dir + f"enclosures_chars_{region_id}.parquet")
streets = gpd.read_parquet(chars_dir + f"streets_chars_{region_id}.parquet")
nodes = gpd.read_parquet(chars_dir + f"nodes_chars_{region_id}.parquet")

In [29]:

merged = pd.merge(
    tessellation.drop(columns=["geometry"]),
    buildings.drop(columns=["nodeID", "geometry", 'nID']),
    right_index=True,
    left_index=True,
    how="left",
)

merged = merged.merge(
    enclosures.drop(columns="geometry"),
    right_on="eID",
    left_on="enclosure_index",
    how="left",
)

merged = merged.merge(streets.drop(columns="geometry"), on="nID", how="left")
merged = merged.merge(nodes.drop(columns="geometry"), on="nodeID", how="left")

merged = merged.drop(
    columns=[
        "nID",
        "eID",
        "nodeID",
        "mm_len",
        "cdsbool",
        "node_start",
        "node_end",
        "x",
        "y",
        "enclosure_index",
        # "id",
        # "osm_id",
        "index",  ## maybe keep
    ]
)
merged = merged.set_index(tessellation.index)

In [30]:
from core.utils import used_keys

In [31]:
primary = merged[list(used_keys.keys())]
primary.shape

(36181, 63)

In [32]:
primary.to_parquet(chars_dir + f'primary_chars_{region_id}.parquet')

----

In [108]:
streets = gpd.read_parquet(streets_dir + f'streets_{region_id}.parquet')

In [110]:
streets

Unnamed: 0,id,geometry,class
0,0821e37fffffffff0477ae0e1bed1e9a,"LINESTRING (4611749.424 3004369.263, 4611780.6...",residential
1,0821e37fffffffff047bbe5677a74480,"LINESTRING (4611714.139 3004739.97, 4611787.46...",residential
2,0821e37fffffffff047bfb8fac33b4c9,"LINESTRING (4611567.93 3004432.278, 4611694.70...",residential
3,0821e37fffffffff047fa869f63e10e7,"LINESTRING (4611467.748 3004340, 4611448.418 3...",secondary
4,0821e37fffffffff047fff5f666d5099,"LINESTRING (4611372.18 3006808.012, 4611455.43...",tertiary
...,...,...,...
59751,08e1e354e35110570477ffeef2f023b4,"LINESTRING (4645687.693 3003560.862, 4645689.7...",tertiary
59752,08e1e354e62e2ac70477ff140f503231,"LINESTRING (4643777.747 3005180.047, 4643778.1...",tertiary
59753,08e1e354e68de297047befce7622274c,"LINESTRING (4642665.336 3005942.733, 4642662.6...",secondary
59754,08e1e354f40a18a7047bffd7edee26e0,"LINESTRING (4639107.972 3002845.339, 4639107.8...",residential


In [112]:
region_id

69300

In [None]:
# buildings.explore()

In [None]:
region_id = 69300

In [None]:
from core.generate_chars import process_enclosure_chars, process_building_chars, process_tessellation_chars, process_street_chars
# process_street_chars(69300)
# process_enclosure_chars(69300)
# process_building_chars(69300)
process_tessellation_chars(69300)

In [None]:
# tessellation = gpd.read_parquet(
#         data_dir + f"tessellations/tessellation_{69300}.parquet"
#     )

# # import lonboard
# # # layer = lonboard.PolygonLayer.from_geopandas(tessellation, opacity=0.15)
# # # m = lonboard.Map([layer])
# # # m

In [None]:
from core.generate_streets import read_overture_region_streets, rp

In [None]:
streets = read_overture_region_streets(region_hulls.loc[[region_name]].to_crs(epsg=4326).iloc[0].convex_hull, region_id)

In [None]:
orig_streets = streets

In [None]:
approved_roads = ['living_street',
                 'motorway',
                 'motorway_link',
                 'pedestrian',
                 'primary',
                 'primary_link',
                 'residential',
                 'secondary',
                 'secondary_link',
                 'tertiary',
                 'tertiary_link',
                 'trunk',
                 'trunk_link',
                 'unclassified']
streets = orig_streets[orig_streets['class'].isin(approved_roads)]
## drop tunnels
streets = streets[~streets.road.str.contains('is_tunnel').fillna(False)]
streets = streets.set_crs(epsg=4326).to_crs(epsg=3035)
streets = streets.sort_values('id')[['id', 'geometry', 'class']].reset_index(drop=True)

In [None]:
plotting = streets.reset_index()

In [None]:
import lonboard

In [None]:
layer = lonboard.PathLayer.from_geopandas(plotting, width_min_pixels=1)

In [None]:
m = lonboard.Map(layer)
m

In [None]:
tunnel = streets.loc[[41318, 41316]]

In [None]:
tess = gpd.read_parquet(f"{chars_dir}tessellations/chars_{region}.parquet")

In [None]:
from libpysal.graph import Graph, read_parquet

In [None]:
graph_dir = "/data/uscuni-ulce/processed_data/neigh_graphs/"
graph = read_parquet(graph_dir + f"tessellation_graph_{region}_knn1.parquet")

In [None]:
graph2 = Graph.build_fuzzy_contiguity(tess, buffer=.25).assign_self_weight()

In [None]:
graph

In [None]:
graph2

In [None]:
graph.unique_ids[np.where(graph.cardinalities != graph2.cardinalities)]

In [None]:
# m = tess.loc[graph2[299010].index.values].reset_index().explore()
# m = tess.loc[graph[299010].index.values].reset_index().explore(m=m, color='red')
# m

In [None]:
from core.generate_elements import generate_enclosures
data_dir = "/data/uscuni-ulce/processed_data/"


In [None]:
%%time
buildings = gpd.read_parquet(
    data_dir + f"/buildings/buildings_{region}.parquet"
)
streets = gpd.read_parquet(data_dir + f"/streets/streets_{region}.parquet")
enclosures = generate_enclosures(buildings, streets)

In [None]:
import momepy as mm

In [None]:
%%time
buff_buildings = buildings.buffer(105, resolution=2).union_all()

In [None]:
buff_buildings = mm.buffered_limit(buildings, buffer='adaptive')

In [None]:
buffer = gpd.GeoSeries([polygon for polygon in buff_buildings.geoms], crs=buildings.crs)

In [None]:
enclosures = mm.enclosures(streets, limit=buff_buildings, clip=True)

In [None]:
# enclosures.explore()

In [None]:
enclosures = mm.enclosures(streets, limit=buff_buildings, clip=True)


In [None]:
mm.centroid_corner_distance?

In [None]:
# buffer.reset_index().explore()

In [None]:
# enclosures.iloc[np.unique(res)].explore()

In [None]:
import shapely

In [None]:
# buffer.reset_index().explore()

In [None]:
buffer.iloc[403]

In [None]:
buffer.iloc[403].exterior

In [None]:
shapely.concave_hull(buffer.iloc[403].exterior, ratio=.02, allow_holes=False)

In [None]:
buffer_no_holes = shapely.polygonize(buffer.exterior.values)
buffer_no_holes = gpd.GeoSeries(buffer_no_holes.geoms, crs=buildings.crs)

In [None]:
inp, res = enclosures.sindex.query(buffer_no_holes.geometry, predicate='intersects')

In [None]:
# enclosures.iloc[np.unique(res)].explore()

In [None]:
has_holes = shapely.get_num_interior_rings(buffer_no_holes.geometry)

In [None]:
with_holes = buffer[has_holes > 0]
with_holes

In [None]:
inp, res = enclosures.sindex.query(buffer.geometry, predicate='intersects')

In [58]:
# gpd.read_file(f).set_crs(epsg=25832).explore()