# Statistical comparison of errors

In [None]:
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import momepy as mm
import numpy as np
import pandas as pd

from core import stats, utils

In [None]:
# make subfolders for evaluation plots
for subfolder in ["../../plots/", "../../plots/evaluation/"] + [
    f"../../plots/evaluation/{fua}" for fua in utils.fua_city
]:
    os.makedirs(subfolder, exist_ok=True)

In [2]:
# which variables to evaluate
eval_vars = [
    "edge_count",
    "edge_length",
    "node_count",
    "avg_degree",
    "stroke_count",
    "stroke_length_sum",
    "stroke_length_max",
]

Define which methods to evaluate

In [3]:
# which methods to evaluate
methods_to_evaluate = [
    "cityseer",
    "original",
    "osmnx",
    "parenx-voronoi",
    "parenx-skeletonize",
    "revised_manual",
    "neatnet",
]

In [4]:
methods_to_compare = [
    "cityseer",
    "osmnx",
    "parenx-voronoi",
    "parenx-skeletonize",
    "neatnet",
]

In [5]:
# which FUA?
fua = 1656

# which h3 resolution?
res = 9

In [None]:
import warnings

for fua in utils.fua_city:
    print(f"Generating for {fua}")
    # read in base data
    meta = utils.read_sample_data()
    geom = meta.loc[meta.eFUA_ID == fua, "geometry"]
    city = meta.loc[meta.eFUA_ID == fua, "eFUA_name"].values[0]

    gdf_orig = utils.read_original(fua)
    proj_crs = gdf_orig.crs

    # Make grid
    base_grid = utils.make_grid(fua, res, proj_crs)

    # get info on cells with revised data
    deltas = gpd.read_file(f"../../revision/{fua}/deltas_updated.gpkg")

    # read results from all methods into dict
    methods = {}

    for method in methods_to_evaluate:
        print(f"Reading in results for {method}")
        gdf = utils.read_results(fua, method, proj_crs)

        print("     getting graph")
        gdf = gdf[~gdf.normalize().duplicated()].copy().reset_index(drop=True)
        G = mm.gdf_to_nx(gdf, length="length", integer_labels=True)

        nodes, edges = mm.nx_to_gdf(G)

        # add node degrees
        print("     adding node degree")
        nodes = stats.add_node_degree(nodes, G)

        # add stroke IDs
        print("     measuring coins")
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=UserWarning)
            coins = mm.COINS(edges, angle_threshold=120, flow_mode=True)
        edges["stroke_id"] = coins.stroke_attribute()
        stroke_gdf = coins.stroke_gdf()

        methods[method] = {}
        methods[method]["gdf"] = gdf
        methods[method]["graph"] = G
        methods[method]["nodes"] = nodes
        methods[method]["edges"] = edges

        ### grid with stats eval for this method only
        grid = base_grid.copy()

        print("     measuring grid edge")
        # add ratio columns to grid
        grid[["edge_count", "edge_length"]] = grid.apply(
            lambda x: stats.get_edge_stats(edges, x.geometry),  # noqa: B023
            axis=1,
            result_type="expand",
        )

        print("     measuring grid node")
        grid[["node_count", "node_degrees", "avg_degree"]] = grid.apply(
            lambda x: stats.get_node_stats(nodes, x.geometry),  # noqa: B023
            axis=1,
            result_type="expand",
        )

        print("     measuring stroke")
        grid[["stroke_count", "stroke_length_sum", "stroke_length_max"]] = grid.apply(
            lambda x: stats.get_stroke_stats(edges, stroke_gdf, x.geometry),  # noqa: B023
            axis=1,
            result_type="expand",
        )

        # add info on which cells have to be verified still
        grid["to_verify_total"] = deltas["to_verify_total"]

        # save grid to dict
        methods[method]["grid"] = grid

        # save to a file
        grid.to_file(f"../../evaluation/{fua}/grid.gpkg", layer="method")

        # get euclidean distance between the distributions
        deltas = {}
        for eval_var in eval_vars:
            deltas[eval_var] = {}
            for method in methods_to_compare:
                delta_comp = (
                    methods["revised_manual"]["grid"][eval_var]
                    - methods[method]["grid"][eval_var]
                ) ** 2

                delta = np.sqrt((delta_comp).sum())
                deltas[eval_var][method] = delta

        # plot the distance by eval variable
        fig, axs = plt.subplots(7, figsize=(6, 18))
        for i, eval_var in enumerate(eval_vars):
            s = pd.Series(deltas[eval_var])
            s.plot.barh(ax=axs.flat[i])
            axs.flat[i].axvline(s.min(), color="coral", linestyle="--")
            axs.flat[i].set_xlabel(eval_var)
            plt.tight_layout()
            fig.savefig(
                f"../../plots/evaluation/{fua}/euclidean_distance.png",
                dpi=300,
                bbox_inches="tight",
            )

Reading in results for cityseer
     getting graph
     adding node degree
     measuring coins
     measuring grid edge
     measuring grid node
     measuring stroke
Reading in results for original
     getting graph
     adding node degree
     measuring coins
     measuring grid edge
     measuring grid node
     measuring stroke
Reading in results for osmnx
     getting graph
     adding node degree
     measuring coins
     measuring grid edge
     measuring grid node
     measuring stroke
Reading in results for parenx-voronoi
     getting graph
     adding node degree
     measuring coins
     measuring grid edge
     measuring grid node
     measuring stroke
Reading in results for parenx-skeletonize
     getting graph
     adding node degree
     measuring coins
     measuring grid edge
     measuring grid node
     measuring stroke
Reading in results for revised_manual
     getting graph
     adding node degree
     measuring coins
     measuring grid edge
     measuring grid 