# Revision of manually simplified vs. original data

As a preprocessing step for evaluation: highlight where manually simplified data contains *more* than original data, which is overwhelmingly likely an artifact of the service road mess (SORRY)

Three predictors for manual data needing updating:
* there are more edges in manual than in original data ( `edge_count_delta < 0` )
* the length sum of all edges is *significantly* longer in manual than in the original data (`edge_length_delta < -0.001` - rounding precision tolerance, in meters)
* there are more nodes in manual than in original data ( `node_count_delta < 0` )

In [11]:
import os
import pathlib

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import momepy as mm
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
from scipy.stats import gaussian_kde

from core import stats, utils, viz

In [2]:
# which variables to evaluate
eval_vars = [
    "edge_count",
    "edge_length",
    "node_count",
    # "avg_degree",
    # "stroke_count",
    # "stroke_length_sum",
    # "stroke_length_max",
]

# which methods to evaluate
methods_to_evaluate = [
    "manual",
    "original",
]

# which method pairs to compare
methodpairs_to_compare = [
    ("original", "manual"),
]

# which edge length threshold to use to rule out precision errors
edge_length_threshold = -0.001

# which h3 resolution?
res = 9

In [3]:
# prepare subfolders for output
output_folder = "../../revision/"
os.makedirs(output_folder, exist_ok=True)
subfolders = [output_folder + str(fua) for fua in utils.fua_city]
for sub in subfolders:
    os.makedirs(sub, exist_ok=True)

In [4]:
# for each city,
for fua in utils.fua_city:

    # read data
    meta = utils.read_sample_data()
    geom = meta.loc[meta.eFUA_ID == fua, "geometry"]
    city = meta.loc[meta.eFUA_ID == fua, "eFUA_name"].values[0]

    print(f"Running for {city}")
    gdf_orig = utils.read_original(fua)
    proj_crs = gdf_orig.crs

    ### MAKE GRID 
    base_grid = utils.make_grid(fua, res, proj_crs)

    # read results from manual and orig methods into dict
    methods = {}

    for method in methods_to_evaluate:
        print(f"\t Reading in results for {method}")
        gdf = utils.read_results(fua, method, proj_crs)
        gdf = gdf[~gdf.normalize().duplicated()].copy().reset_index(drop=True)
        G = mm.gdf_to_nx(gdf, length="length", integer_labels=True)
        nodes, edges = mm.nx_to_gdf(G)

        # add node degrees
        nodes = stats.add_node_degree(nodes, G)

        methods[method] = {}
        methods[method]["gdf"] = gdf
        methods[method]["graph"] = G
        methods[method]["nodes"] = nodes
        methods[method]["edges"] = edges

        ### grid with stats eval for this method only
        grid = base_grid.copy()

        # add ratio columns to grid
        grid[["edge_count", "edge_length"]] = grid.apply(
            lambda x: stats.get_edge_stats(edges, x.geometry),  # noqa: B023
            axis=1,
            result_type="expand",
        )
        grid[["node_count", "node_degrees", "avg_degree"]] = grid.apply(
            lambda x: stats.get_node_stats(nodes, x.geometry),  # noqa: B023
            axis=1,
            result_type="expand",
        )

        # save grid to dict
        methods[method]["grid"] = grid

    # create DELTAS gdf which will contain the "to verify" evaluation
    deltas = base_grid.copy()
    for var in eval_vars:
        deltas[f"{var}_delta"] = methods["original"]["grid"][var] - methods["manual"]["grid"][var] # compute difference

    deltas[deltas == np.inf] = np.nan

    # check for each variable whether it is below an expected value
    to_verify = {
        'edge_count_delta': 0,
        'edge_length_delta': edge_length_threshold,
        'node_count_delta': 0,
    }

    for variable, threshold in to_verify.items():
        deltas[variable + "_to_verify"] = deltas[variable] < threshold

    # add total count of variables indicating need to verify
    deltas["to_verify_total"] = deltas[[f"{variable}_to_verify" for variable in to_verify]].apply(lambda x: sum(x), axis = 1)

    # save deltas gdf to folder
    deltas.to_file(f"../../revision/{fua}/deltas.gpkg", index=False)

    # print info
    cells_total = len(deltas)
    cells_issues = len(deltas[deltas.to_verify_total>0])
    print(f"\t {city}: {cells_issues} of {cells_total} cells have issues. deltas gdf saved.")



Running for Aleppo
	 Reading in results for manual
	 Reading in results for original
	 Aleppo: 77 of 5020 cells have issues. deltas gdf saved.
Running for Auckland
	 Reading in results for manual
	 Reading in results for original
	 Auckland: 97 of 1533 cells have issues. deltas gdf saved.
Running for Bucaramanga
	 Reading in results for manual
	 Reading in results for original
	 Bucaramanga: 64 of 2250 cells have issues. deltas gdf saved.
Running for Douala
	 Reading in results for manual
	 Reading in results for original
	 Douala: 25 of 2630 cells have issues. deltas gdf saved.
Running for Liège
	 Reading in results for manual
	 Reading in results for original
	 Liège: 132 of 4199 cells have issues. deltas gdf saved.
Running for Salt Lake City
	 Reading in results for manual
	 Reading in results for original
	 Salt Lake City: 69 of 1850 cells have issues. deltas gdf saved.
Running for Wuhan
	 Reading in results for manual
	 Reading in results for original
	 Wuhan: 474 of 9934 cells ha

***

**Visualizing results of revision (for Auckland)**

In [15]:
fua = utils.city_fua["Auckland"]
deltas = gpd.read_file(f"../../revision/{fua}/deltas.gpkg")
deltas.head(3)

Unnamed: 0,hex_id,edge_count_delta,edge_length_delta,node_count_delta,edge_count_delta_to_verify,edge_length_delta_to_verify,node_count_delta_to_verify,to_verify_total,geometry
0,89bb5000083ffff,18.0,4.2e-05,18,False,False,False,0,"POLYGON ((302978.815 -4080433.664, 302886.222 ..."
1,89bb5000087ffff,73.0,512.502919,68,False,False,False,0,"POLYGON ((302683.497 -4080235.65, 302590.902 -..."
2,89bb500008bffff,18.0,-1.2e-05,18,False,False,False,0,"POLYGON ((303291.661 -4080266.265, 303199.071 ..."


In [17]:
# # plot manual data
# method = "manual"
# m = methods[method]["gdf"].explore(
#     tiles="cartodb.positron",
#     name=method,
#     color="red",
#     style_kwds={"weight":2}
# )

# # plot original data
# method = "original"
# methods[method]["gdf"].explore(
#     m=m,
#     name=method,
#     color="black",
#     style_kwds={"weight":1}
# )

# plot tiles
m = deltas.explore(
    tiles="cartodb.positron",
    column="to_verify_total",
    name="cells",
    cmap="Accent"
)

folium.LayerControl().add_to(m)

m