In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from sklearn.neighbors import KDTree
from tqdm import tqdm
import json

# === 1. Load LSOA centroids (EPSG:27700) ===
lsoa_df = pd.read_csv("LLSOA_Dec_2021_PWC_for_England_and_Wales_2022_1028145039677403461.csv")
lsoa_df["geometry"] = lsoa_df.apply(lambda row: Point(row["x"], row["y"]), axis=1)
lsoa_gdf = gpd.GeoDataFrame(lsoa_df, geometry="geometry", crs="EPSG:27700")

# === 2. Load road nodes (lat/lon) and convert to EPSG:27700 ===
nodes = gpd.read_file("uk_driving_nodes_simplified.geojson")
nodes = nodes.to_crs(epsg=27700)

# === 3. Build KDTree on node coordinates ===
# Drop invalid geometries
valid_nodes = nodes[nodes.geometry.is_valid & nodes.geometry.notnull()]
valid_nodes = valid_nodes[valid_nodes.geometry.x.notnull() & valid_nodes.geometry.y.notnull()]

node_coords = list(zip(valid_nodes.geometry.x, valid_nodes.geometry.y))

tree = KDTree(node_coords)

# === 4. Query each LSOA individually and map node ID to LSOA code ===
node_to_lsoas = {}
for i in tqdm(range(len(lsoa_gdf)), desc="Querying each LSOA"):
    x, y = lsoa_gdf.geometry.iloc[i].x, lsoa_gdf.geometry.iloc[i].y
    lsoa_code = lsoa_gdf.iloc[i]["LSOA21CD"]
    
    dist, idx = tree.query([[x, y]], k=1)
    node_id = int(nodes.iloc[idx[0][0]]["osmid"])
    
    node_to_lsoas.setdefault(str(node_id), []).append(lsoa_code)

# === 5. Save to JSON ===
with open("node_to_lsoa_mapping.json", "w") as f:
    json.dump(node_to_lsoas, f, indent=2)

print(f"✅ Saved {len(node_to_lsoas)} node-to-LSOA mappings to node_to_lsoa_mapping.json")

Querying each LSOA: 100%|██████████| 35672/35672 [00:06<00:00, 5876.09it/s]

✅ Saved 35671 node-to-LSOA mappings to node_to_lsoa_mapping.json



