# Draft of H3 evaluation
grid-cell wise comparison of metrics

In [None]:
%load_ext watermark
%watermark

In [None]:
import os
import pathlib

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import momepy as mm
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde

from core import eval, stats, utils, viz

%watermark -w
%watermark -iv

In [None]:
# which variables to evaluate
eval_vars = [
    "edge_count",
    "edge_length",
    "node_count",
    "avg_degree",
    "stroke_count",
    "stroke_length_sum",
    "stroke_length_max",
]

# which methods to evaluate
methods_to_evaluate = [
    "cityseer",
    "manual",
    "orig",
    "osmnx",
    # "parenx-voronoi",
    # "parenx-skeletonize",
    "sgeop",
]

# which method pairs to compare
methodpairs_to_compare = [
    ("cityseer", "manual"),
    ("orig", "manual"),
    ("osmnx", "manual"),
    # ("parenx-voronoi", "manual"),
    # ("parenx-skeletonize", "manual"),
    ("sgeop", "manual"),
]

make directories for evaluation results

In [None]:
evalfolder = "../../evaluation/"
os.makedirs(evalfolder, exist_ok=True)
subfolders = [evalfolder + str(fua) for fua in utils.fua_city]
for sub in subfolders:
    os.makedirs(sub, exist_ok=True)

Choose use case (FUA)

In [None]:
# which FUA?
fua = 869

# which h3 resolution?
res = 9

meta = utils.read_sample_data()
geom = meta.loc[meta.eFUA_ID == fua, "geometry"]
city = meta.loc[meta.eFUA_ID == fua, "eFUA_name"].values[0]

gdf_orig = utils.read_original(fua)
proj_crs = gdf_orig.crs

Make grid

In [None]:
base_grid = utils.make_grid(fua, res, proj_crs)

Read in results from different methods

In [None]:
# read results from all methods into dict

methods = {}

for method in methods_to_evaluate:
    print(f"Reading in results for {method}")
    gdf = eval.read_method_outputs(fua, method, proj_crs)
    gdf = gdf[~gdf.normalize().duplicated()].copy().reset_index(drop=True)
    G = mm.gdf_to_nx(gdf, length="length", integer_labels=True)
    nodes, edges = mm.nx_to_gdf(G)

    # add node degrees
    nodes = stats.add_node_degree(nodes, G)

    # add stroke IDs
    coins = mm.COINS(edges, angle_threshold=120, flow_mode=True)
    edges["stroke_id"] = coins.stroke_attribute()
    stroke_gdf = coins.stroke_gdf()

    methods[method] = {}
    methods[method]["gdf"] = gdf
    methods[method]["graph"] = G
    methods[method]["nodes"] = nodes
    methods[method]["edges"] = edges

    ### grid with stats eval for this method only
    grid = base_grid.copy()

    # add ratio columns to grid
    grid[["edge_count", "edge_length"]] = grid.apply(
        lambda x: stats.get_edge_stats(edges, x.geometry),  # noqa: B023
        axis=1,
        result_type="expand",
    )
    grid[["node_count", "node_degrees", "avg_degree"]] = grid.apply(
        lambda x: stats.get_node_stats(nodes, x.geometry),  # noqa: B023
        axis=1,
        result_type="expand",
    )

    grid[["stroke_count", "stroke_length_sum", "stroke_length_max"]] = grid.apply(
        lambda x: stats.get_stroke_stats(edges, stroke_gdf, x.geometry),  # noqa: B023
        axis=1,
        result_type="expand",
    )

    # save grid to dict
    methods[method]["grid"] = grid

get weights from orig-manual difference rankings

In [None]:
## Get "deltas": grid gdf of absolute differences orig-manual for weighting

assert "manual" in methods, "Need to read in manual data first"
assert "orig" in methods, "Need to read in orig data first"

deltas = base_grid.copy()
quantiles = np.arange(0, 1.1, 0.1)  # 10% steps

for var in eval_vars:
    deltas[f"{var}_delta"] = abs(
        methods["orig"]["grid"][var] - methods["manual"]["grid"][var]
    )
    deltas[deltas == np.Inf] = np.NaN
    rank = list(deltas.sort_values(by=f"{var}_delta", ascending=False).index)
    # rank 0 == largest delta; rank N == smallest delta
    rank = np.argsort(rank)
    deltas[f"{var}_rank"] = rank


colnames = [f"{var}_rank" for var in eval_vars]
deltas["total_rank"] = deltas[colnames].sum(axis=1)

deltas["rank_quantile"] = pd.qcut(
    deltas["total_rank"],
    q=quantiles,
    retbins=False,
    labels=False,  # we want integer labels of ranks
)

deltas["rank_weight"] = deltas.rank_quantile.apply(lambda x: 10 - x)

# delete helper columns
deltas = deltas.drop(columns=colnames + ["rank_quantile", "total_rank"])

deltas.head()

explore weighting, does it make sense?

In [None]:
m = deltas.explore(
    tiles="cartodb positron", name="cells", column="rank_weight", cmap="Reds"
)
methods["orig"]["gdf"].explore(m=m, name="orig", color="black")
methods["manual"]["gdf"].explore(m=m, name="manual", color="red")
folium.LayerControl().add_to(m)
m

***

evaluate methods

In [None]:
## Get eval dict

evaldict = {}


for m1, m2 in methodpairs_to_compare:
    # make sure both methods are read into dict
    assert m1 in methods, f"Need to read in {m1} results first!"
    assert m2 in methods, f"Need to read in {m2} results first!"

    # get base grid
    grid = base_grid.copy()

    # add ratio columns
    for var in eval_vars:
        grid[f"{var}_ratio"] = methods[m1]["grid"][var] / methods[m2]["grid"][var]
        grid[f"{var}_delta"] = abs(methods[m1]["grid"][var] - methods[m2]["grid"][var])

    # save grid as dict entry
    evaldict[f"{m1}_{m2}"] = grid

    del grid

### TODO

**pick up here**

* pass 10-rank `np.repeat` or similar for weighting values for KDE
* make KDE plots for each evaluation variable, for ratio and abs values,
    * non-weighted
    * weighted (by repeating with rank)

In [None]:
# data = np.repeat(x=evaldict[methodpair].varcolumn, a=deltas.rank_weight)

# # KDE
# npoints = len(data) // 10
# kde = gaussian_kde(data, bw_method="silverman")
# mylinspace = np.linspace(data.min(), data.max(), npoints)
# pdf = kde.pdf(mylinspace)

# # HIST
# plt.hist(data, nbins=50)

# # with seaborn: https://seaborn.pydata.org/generated/seaborn.displot.html
# # # sns.displot(kde=True) plots both KDE and hist

make plots

In [None]:
# var = eval_vars[6]
# fig, ax = plt.subplots(1,1, figsize = (10,10))

# for i, (m1, m2) in enumerate(methodpairs_to_compare[1:]):
#     data = np.array(
#             evaldict[f"{m1}_{m2}"][f"{var}_delta"]
#         )
#     ax.hist(
#         data,
#         bins=10,
#         # alpha=0.2,
#         label=m1,
#         # edgecolor = "black",
#         histtype="step"
#     )
# ax.set_yscale("log")
# plt.suptitle(f"Distribution of {var} abs deltas in method-manual")
# plt.legend()
# plt.show()

In [None]:
# # parameter for pdf fitting
# npoints = len(weights["grid"])//10
# print(f"npoints: {npoints}");
# # get data for current option, using log of data
# data = np.array(
#     weights["grid"]["stroke_length_sum_delta"]
# )

# # fit Gaussian KDE
# kde = gaussian_kde(data, bw_method="silverman")

# # define linear space
# mylinspace = np.linspace(data.min(), data.max(), npoints)

# # generate probability density function
# pdf = kde.pdf(mylinspace)

# fig, ax = plt.subplots(1,1)

# ax.plot(pdf)
# #ax.hist(data, bins=npoints)[0]


***

In [None]:
## Make evaldict of comparisons

evaldict = {}

for m1, m2 in methodpairs_to_compare:
    # make sure both methods are read into dict
    assert m1 in methods, f"Need to read in {m1} results first!"
    assert m2 in methods, f"Need to read in {m2} results first!"

    # get base grid
    grid = base_grid.copy()

    # add ratio columns for all evaluation variables
    for var in eval_vars:
        grid[f"{var}_ratio"] = methods[m1]["grid"][var] / methods[m2]["grid"][var]
        grid[grid == np.Inf] = np.NaN

    # save grid as dict entry
    evaldict[f"{m1}_{m2}"] = grid

    del grid

***
***

# Usage of evaldict

In evaldict, evaluation results are stored.

key: pair of methods (left, right); 

value: grid with `_ratio` columns for evaluation variables (left / right)

In [None]:
comp = "manual_parenx-voronoi"
var = "stroke_length_max_ratio"
c1, c2 = comp.split("_")

In [None]:
cells = evaldict[comp].copy()
m = cells.explore(tiles="cartodb positron", column=var, cmap="Reds", name=var)
methods[c1]["gdf"].explore(m=m, name=c1, color="black")
methods[c2]["gdf"].explore(m=m, name=c2, color="green")
folium.LayerControl().add_to(m)
m

In [None]:
evaldict["orig_manual"]

In [None]:
evaldict["manual_orig"]

In [None]:
cmap = cm.PiYG

fig, ax = plt.subplots(1, 2, figsize=(20, 10))

i = 0
evaldict[c1].plot(
    ax=ax[i],
    column=var,
    # norm=colors.CenteredNorm(vcenter=1),
    vmin=-3,
    vmax=5,
    cmap=cmap,
    legend=True,
)
ax[i].set_title(c1)

i = 1
evaldict[c2].plot(
    ax=ax[i],
    column=var,
    # norm=colors.CenteredNorm(vcenter=1),
    vmin=-3,
    vmax=5,
    cmap=cmap,
    legend=True,
)
ax[i].set_title(c2)

plt.suptitle(var);

In [None]:
c1 = "manual_parenx-skeletonize"
c2 = "manual_sgeop"
var = "edge_length_ratio"

cmap = cm.PiYG

fig, ax = plt.subplots(1, 2, figsize=(20, 10))

i = 0
evaldict[c1].plot(
    ax=ax[i],
    column=var,
    norm=colors.CenteredNorm(vcenter=1),
    cmap=cmap,
    legend=True,
)
ax[i].set_title(c1)

i = 1
evaldict[c2].plot(
    ax=ax[i],
    column=var,
    norm=colors.CenteredNorm(vcenter=1),
    cmap=cmap,
    legend=True,
)
ax[i].set_title(c2)

plt.suptitle(var);