# Draft of feature matching

Adapted from: https://github.com/anerv/BikeDNA_BIG/blob/main/feature_matching_hpc/scripts/feature_matching.py

In [61]:
# Load libraries, settings and data

import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
import pickle

import momepy as mm
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm, colors
import contextily as cx

import utils
import matching_functions as match_func

user settings for feature matching algorithm

In [2]:
# Define feature matching user settings
segment_length = 10  # The shorter the segments, the longer the matching process will take. For cities with a gridded street network with streets as straight lines, longer segments will usually work fine
buffer_dist = 15
hausdorff_threshold = 17
angular_threshold = 30

for s in [segment_length, buffer_dist, hausdorff_threshold, angular_threshold]:
    assert isinstance(s, int) or isinstance(s, float), print(
        "All parameters must be integer or float values!"
    )

user settings for use case (FUA)

In [3]:
# which FUA?
fua = 869

# which h3 resolution?
h3_resolution = 9

base_crs = "EPSG:4326"

# original base data
orig_file = f"../data/{fua}/roads_osm.parquet"

# which file to take as baseline?
base_file = f"../data/{fua}/manual/{fua}.parquet"

# where to save results?
res_folder = f"../data/{fua}/evaluation/"

read in data

In [4]:
meta = utils.read_sample_data()
geom = meta.loc[meta.eFUA_ID == fua, "geometry"]
city = meta.loc[meta.eFUA_ID == fua, "eFUA_name"].values[0]

In [5]:
# read in orig base data
orig = utils.read_parquet_roads(fua)
orig = orig[["geometry"]]

# get projected CRS 
proj_crs = orig.crs

# read in base case (manual simp)
base = utils.read_manual(fua, proj_crs=orig.crs)

# which file to compare with?
comp = utils.read_parenx(fua, "voronoi", proj_crs=orig.crs)

  gdf = gdf.explode(ingore_index=True)


In [6]:
# get graphs & node/edge gdfs for both data sets
base_graph = mm.gdf_to_nx(base, length="length", integer_labels=True)
comp_graph = mm.gdf_to_nx(comp, length="length", integer_labels=True)
nodes_base, edges_base = mm.nx_to_gdf(base_graph)
nodes_comp, edges_comp = mm.nx_to_gdf(comp_graph)
# add node degree to nodes df
nodes_base = utils.add_node_degree(nodes_base, base_graph)
nodes_comp = utils.add_node_degree(nodes_comp, comp_graph)

make h3 grid

In [8]:
grid = utils.make_grid(fua, h3_resolution, proj_crs=proj_crs)
os.makedirs(res_folder, exist_ok=True)
grid.to_file(res_folder + "grid.gpkg", index = False)
grid.head()

Unnamed: 0,geometry,hex_id
0,"POLYGON ((306416.804 -4085001.467, 306324.240 ...",89bb50030a3ffff
1,"POLYGON ((306001.641 -4080952.046, 305909.076 ...",89bb5000657ffff
2,"POLYGON ((300335.407 -4084696.785, 300242.787 ...",89bb500202fffff
3,"POLYGON ((311501.725 -4093311.127, 311409.201 ...",89bb501acc7ffff
4,"POLYGON ((295294.905 -4081695.931, 295202.240 ...",89bb5002d57ffff


In [None]:
# grid.grid_id -> grid.hex_id
# ref_edges_simplifiied --> edges_comp
# osm_edges_simplified --> edges_base
# study_crs --> proj_crs
# path --> res_folder

create segments

In [9]:
# base_segments.rename(columns={"baseid": "org_baseid"}, inplace=True)
# base_segments["baseid"] = base_segments[
#     "edge_id"
# ]  # Because matching function assumes an id column names baseid as unique id for edges
base_segments = match_func.create_segment_gdf(
    edges_base, segment_length=segment_length
)
base_segments = base_segments.set_crs(proj_crs)
base_segments = base_segments.dropna(subset=["geometry"])

comp_segments = match_func.create_segment_gdf(
    edges_comp, segment_length=segment_length
)
comp_segments = comp_segments.set_crs(proj_crs)
comp_segments = comp_segments.dropna(subset=["geometry"])
comp_segments = comp_segments.rename(columns={"seg_id": "seg_id_comp"})

base_segments.to_parquet(res_folder+f"base_segments_{segment_length}.parquet")
comp_segments.to_parquet(res_folder+f"comp_segments_{segment_length}.parquet")

print('Segments created and saved!')


Segments created and saved!


buffers

In [11]:
buffer_matches = match_func.overlay_buffer(
    reference_data=comp_segments,
    osm_data=base_segments,
    ref_id_col="seg_id_comp",
    osm_id_col="seg_id",
    dist=buffer_dist,
)

print('Buffer matches found!')

buffer_matches.to_parquet(
    res_folder + f"buffer_matches_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.parquet"
    )


Buffer matches found!


final matches

In [12]:
# final matches
segment_matches = match_func.find_matches_from_buffer(
    buffer_matches=buffer_matches,
    osm_edges=base_segments,
    reference_data=comp_segments,
    angular_threshold=angular_threshold,
    hausdorff_threshold=hausdorff_threshold,
)

print("Segment matches found!")

matches_fp = res_folder + f"segment_matches_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.pickle"

with open(matches_fp, "wb") as f:
    pickle.dump(segment_matches, f)

print("Segments matching completed, results saved.")


205042 reference segments were matched to OSM edges
8703 reference segments were not matched
Segment matches found!
Segments matching completed, results saved.


matched v. unmatched

In [13]:
base_matched_segments = base_segments.loc[base_segments.seg_id.isin(segment_matches.matches_id)]
base_unmatched_segments = base_segments.loc[~base_segments.seg_id.isin(segment_matches.matches_id)]

comp_matched_segments = segment_matches
comp_unmatched_segments = comp_segments.loc[~comp_segments.seg_id_comp.isin(segment_matches.seg_id_comp)]

# TO PARQUET
base_matched_segments.to_parquet(res_folder + f"base_matched_segments_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.parquet")
base_unmatched_segments.to_parquet(res_folder + f"base_unmatched_segments_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.parquet")
comp_matched_segments.to_parquet(res_folder + f"comp_matched_segments_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.parquet")
comp_unmatched_segments.to_parquet(res_folder + f"comp_unmatched_segments_{buffer_dist}_{hausdorff_threshold}_{angular_threshold}.parquet")


see `featurematching_plot.ipynb` nb for first visualizations of feature matching results