# Draft of feature matching

Adapted from: https://github.com/anerv/BikeDNA_BIG/blob/main/feature_matching_hpc/scripts/feature_matching.py

In [2]:
# Load libraries, settings and data

debug = False

import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
import yaml
import pickle

import momepy as mm
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm, colors
import contextily as cx

import utils
import matching_functions as match_func

user settings for feature matching algorithm

In [None]:
# Define feature matching user settings
segment_length = 10  # The shorter the segments, the longer the matching process will take. For cities with a gridded street network with streets as straight lines, longer segments will usually work fine
buffer_dist = 15
hausdorff_threshold = 17
angular_threshold = 30


user settings for use case (FUA)

In [None]:
# which FUA?
fua = 869

# which h3 resolution?
res = 9

base_crs = "EPSG:4326"

# baseinal OSM data
orig_file = f"../data/{fua}/roads_osm.parquet"

# which file to take as baseline?
base_file = f"../data/{fua}/manual/{fua}.parquet"

read in data

In [None]:
meta = utils.read_sample_data()
geom = meta.loc[meta.eFUA_ID == fua, "geometry"]
city = meta.loc[meta.eFUA_ID == fua, "eFUA_name"].values[0]

In [None]:
# read in orig OSM data
orig = utils.read_parquet_roads(fua)
orig = orig[["geometry"]]

# get projected CRS 
proj_crs = orig.crs

# read in base case (manual simp)
base = utils.read_manual(fua, proj_crs=orig.crs)

# which file to compare with?
comp = utils.read_parenx(fua, "voronoi", proj_crs=orig.crs)

In [None]:
# get graphs & node/edge gdfs for both data sets
base_graph = mm.gdf_to_nx(base, length="length", integer_labels=True)
comp_graph = mm.gdf_to_nx(comp, length="length", integer_labels=True)
nodes_base, edges_base = mm.nx_to_gdf(base_graph)
nodes_comp, edges_comp = mm.nx_to_gdf(comp_graph)
# add node degree to nodes df
nodes_base = utils.add_node_degree(nodes_base, base_graph)
nodes_comp = utils.add_node_degree(nodes_comp, comp_graph)

make h3 grid

In [None]:
grid = utils.make_grid(fua, res, proj_crs=proj_crs)
grid.head()

make subfolder for results, and save grid there

In [None]:
os.makedirs(f"../data/{fua}/evaluation/", exist_ok=True)
grid.to_file(f"../data/{fua}/evaluation/grid.gpkg", index = False)

In [None]:
# # define parameters

# study_area = parsed_yaml_file["study_area"] study_area: 'dk' # provide name of study area
# study_crs = parsed_yaml_file["study_crs"] study_crs: 'EPSG:25832' # The CRS you want to use for the analysis. 
# reference_name = parsed_yaml_file["reference_name"] reference_name: 'GeoDanmark' # provide name of reference dataset

# # define filepaths and read in data

# path = f"../{study_area}"

make grid

In [None]:
# Create and plot grid

def create_h3_grid(polygon_gdf, hex_resolution, crs, buffer_dist):

    # Inspired by https://stackoverflow.com/questions/51159241/how-to-generate-shapefiles-for-h3-hexagons-in-a-particular-area

    print(f"Creating hexagons at resolution {hex_resolution}...")

    union_poly = polygon_gdf.buffer(buffer_dist).to_crs("EPSG:4326").geometry.unary_union

    # Find the hexagons within the shape boundary using PolyFill
    hex_list=[]
    for n,g in enumerate(union_poly):
        temp = mapping(g)
        temp['coordinates']=[[[j[1],j[0]] for j in i] for i in temp['coordinates']]  
        hex_list.extend(h3.polyfill(temp,res=hex_resolution))

    # Create hexagon data frame
    hex_pd = pd.DataFrame(hex_list,columns=["hex_id"])

    # Create hexagon geometry and GeoDataFrame
    hex_pd['geometry'] = [Polygon(h3.h3_to_geo_boundary(x, geo_json=True)) for x in hex_pd["hex_id"]]

    grid = gpd.GeoDataFrame(hex_pd)

    grid.set_crs("4326",inplace=True).to_crs(crs, inplace=True)

    grid["grid_id"] = grid.hex_id

    return grid



set_renderer(renderer_map)

grid = eval_func.create_h3_grid(study_area_poly, 8, study_crs, 500)

fig, ax = plt.subplots(1, figsize=pdict["fsmap"])
grid_cell_area = grid.area.mean() / 1000000 # in km2
print(f"The grid contains {len(grid)} hex cells with an average area of {grid_cell_area:.2f} km2.")
print("This grid will be used for local (grid cell level) analysis:")
grid.plot(ax=ax, facecolor="none", edgecolor=pdict["base"], alpha=pdict["alpha_back"],linewidth=0.2)
ax.set_axis_off()
cx.add_basemap(ax, crs=study_area_poly.crs, source=cx_tile_1)
ax.set_title(
    f"{area_name} study area ({len(grid)} grid cells)"
)

# plot_func.save_fig(fig, osm_results_static_maps_fp + "area_grid_osm")
osm_processed_fp = f"../../data/OSM/{study_area}/processed/"
osm_grid_fp = osm_processed_fp + "grid.parquet"

grid.to_parquet(osm_grid_fp)

read in edges for ref and base case

In [None]:

ref_edges_simplified = gpd.read_parquet(path+"/data/ref_edges_simplified.parquet")
osm_edges_simplified = gpd.read_parquet(path+"/data/osm_edges_simplified.parquet")

read in grid for both (?)

In [None]:

ref_grid = gpd.read_parquet(path+"/data/ref_grid.parquet")
osm_grid = gpd.read_parquet(path+"/data/osm_grid.parquet")

merge grid

In [None]:
grid = pd.merge(left=osm_grid, right=ref_grid.drop('geometry',axis=1), left_index=True, right_index=True, suffixes=('_osm','_ref'))
assert len(grid) == len(osm_grid) == len(ref_grid)
grid['grid_id'] = grid.grid_id_osm

create segments

In [None]:

for s in [segment_length, buffer_dist, hausdorff_threshold, angular_threshold]:
    assert isinstance(s, int) or isinstance(s, float), print(
        "Settings must be integer or float values!"
    )

osm_segments = match_func.create_segment_gdf(
    osm_edges_simplified, segment_length=segment_length
)
osm_segments.rename(columns={"osmid": "org_osmid"}, inplace=True)
osm_segments["osmid"] = osm_segments[
    "edge_id"
]  # Because matching function assumes an id column names osmid as unique id for edges

osm_segments.set_crs(study_crs, inplace=True)
osm_segments.dropna(subset=["geometry"], inplace=True)

ref_segments = match_func.create_segment_gdf(
    ref_edges_simplified, segment_length=segment_length
)
ref_segments.set_crs(study_crs, inplace=True)
ref_segments.rename(columns={"seg_id": "seg_id_ref"}, inplace=True)
ref_segments.dropna(subset=["geometry"], inplace=True)

print('Segments created!')

osm_segments.to_parquet(path+f"/processed/osm_segments_{segment_length}.parquet")
ref_segments.to_parquet(path+f"/processed/ref_segments_{segment_length}.parquet")


buffers

In [None]:
buffer_matches = match_func.overlay_buffer(
    reference_data=ref_segments,
    osm_data=osm_segments,
    ref_id_col="seg_id_ref",
    osm_id_col="seg_id",
    dist=buffer_dist,
)

print('Buffer matches found!')

buffer_matches.to_parquet(path+f"/results/buffer_matches_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.parquet")


final matches

In [None]:
# final matches
segment_matches = match_func.find_matches_from_buffer(
    buffer_matches=buffer_matches,
    osm_edges=osm_segments,
    reference_data=ref_segments,
    angular_threshold=angular_threshold,
    hausdorff_threshold=hausdorff_threshold,
)

print("Segment matches found!")

matches_fp = path+f"/results/segment_matches_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.pickle"

with open(matches_fp, "wb") as f:
    pickle.dump(segment_matches, f)

print("Segments matching completed, results saved.")
