# Amenity hierarchy
**Description:**
This notebook reproduce Figure 5 "Amenity hierarchy of local deviation." for the study.

**Data Source:** 
- `data/dist_diff_summary.csv`: Amenity-level aggregation of distance differentials between recurrent and proximate visitations (Provided in repo).


In [None]:
from pathlib import Path
import sys
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from adjustText import adjust_text


# Setup project root path
project_root = Path.cwd()
if 'notebooks' in str(project_root):
    project_root = project_root.parent

# Add src to path
src_path = str(project_root / 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Set results path
data_dir = project_root / 'data'

pd.options.display.max_columns = 999

## Calculate distance differentials

In [None]:
## Load K-visitation data
# places_k = pd.read_parquet(data_dir / "places_k.parquet") # Raw visitation data
# poi_classes = pd.read_parquet(data_dir / "grid_poi.parquet")

from distance_differentials import build_summary_df # This is the function that computes the summary dataframe with distance differentials

# plot_df = build_summary_df(places_k, poi_classes)

## Figure 5

In [None]:
# Load previously computed dataframe
plot_df = pd.read_csv(data_dir / "dist_diff_summary.csv")

In [None]:
fig, ax = plt.subplots(figsize=(6, 6), dpi=300)

category_order = [
    "Transport",
    "Culture",
    "Retail",
    "Service",
    "Groceries",
    "Dining",
    "Civic & Religion",
    "Healthcare",
    "Education",
    "Fitness",
]
category_order = [cat for cat in category_order if cat in set(plot_df["category"].dropna())]

sns.scatterplot(
    ax=ax,
    data=plot_df,
    x="mean_closest_visit_dist",
    y="dist_diff",
    size="n_places",
    sizes=(50, 500),
    hue="category",
    hue_order=category_order,
    palette="tab20",
    alpha=0.9,
    edgecolor="white",
    linewidth=1.5,
    legend=False,
)

ax.set_xscale("log")
ax.axhline(plot_df["dist_diff"].mean(), color="gray", linestyle="--", linewidth=1, alpha=0.7)
ax.axvline(plot_df["mean_closest_visit_dist"].mean(), color="gray", linestyle="--", linewidth=1, alpha=0.7)
ax.set_yticks([10, 100, 1000], [10, 100, 1000], fontsize=8)
ax.set_xticks([50, 100, 1000, 10000], [50, 100, 1000, 10000], fontsize=8)
ax.set_xlabel("Mean nearest option distance (m)", fontsize=8)
ax.set_ylabel("Distance differential (m)", fontsize=8)

label_df = plot_df.dropna(subset=["mean_closest_visit_dist", "dist_diff", "original_class"])
texts = [
    ax.text(row["mean_closest_visit_dist"], row["dist_diff"], row["original_class"], fontsize=5, alpha=0.8)
    for _, row in label_df.iterrows()
]
if texts and adjust_text is not None:
    adjust_text(texts, ax=ax)

plt.tight_layout()
plt.show()


In [None]:
# Create a separate legend figure
fig_legend, axes = plt.subplots(2, 1, figsize=(2.5, 7), gridspec_kw={'height_ratios': [3, 1.2]}, dpi=300)

# Get colors from tab20 palette
tab20_colors = plt.cm.tab20.colors
category_colors = {cat: tab20_colors[i % 20] for i, cat in enumerate(category_order)}

ax_cat = axes[0]
ax_cat.set_xlim(0, 1)
ax_cat.set_ylim(0, len(category_order) + 1)
ax_cat.axis('off')
ax_cat.set_title('Category', fontsize=11, fontweight='bold', loc='left', x=0.05)

for i, cat in enumerate(category_order):
    y_pos = len(category_order) - i * 0.8
    ax_cat.scatter([0.08], [y_pos], c=[category_colors[cat]], s=60, edgecolor='white', linewidth=0.5)
    ax_cat.text(0.18, y_pos, cat, va='center', ha='left', fontsize=9)

# Size legend (bottom) - Amenity count
ax_size = axes[1]
ax_size.set_xlim(0, 1)
ax_size.set_ylim(0, 5)
ax_size.axis('off')
ax_size.set_title('Amenity count', fontsize=11, fontweight='bold', loc='left', x=0.05)

# Define size legend values
size_values = [8000, 16000, 24000, 32000]
sizes_for_legend = [50 + (s / 32000) * 450 for s in size_values]  # Scale to match sizes=(50, 500)

for i, (val, size) in enumerate(zip(size_values, sizes_for_legend)):
    y_pos = 4 - i * 0.95
    ax_size.scatter([0.12], [y_pos], c='gray', s=size, edgecolor='white', linewidth=0.5, alpha=0.8)
    ax_size.text(0.35, y_pos, f'{val}', va='center', ha='left', fontsize=9)

fig_legend.patch.set_alpha(0)
for ax in axes:
    ax.set_facecolor('none')
plt.subplots_adjust(hspace=0.05)
plt.show()