# Buses Mapping

In [23]:
import pandas as pd

df_combined_raw = pd.read_csv('buses_combined_raw.csv', dtype = {'bus385_id': str})

df_combined_raw['x_385'].notna().sum()

385

In [24]:
df_501 = df_combined_raw[['bus501_id', 'x_501', 'y_501']]
df_385 = df_combined_raw[['bus385_id', 'x_385', 'y_385']].dropna()

df_501

Unnamed: 0,bus501_id,x_501,y_501
0,way/975637991,-1.271945,60.293401
1,relation/8087295-275,-3.753620,58.575786
2,way/495624575-275,-3.512715,58.564233
3,way/567187951-275,-3.879158,58.512154
4,way/1262971878,-3.452185,58.479968
...,...,...,...
496,way/49922626-400,-2.488599,50.623881
497,way/26938722-400,-3.730387,50.471640
498,way/34656007-400,-4.242535,50.444878
499,relation/9572254-400,-4.899645,50.395013


In [32]:
import numpy as np
from scipy.spatial import cKDTree

tree = cKDTree(df_501[["x_501", "y_501"]].to_numpy())

dist, idx = tree.query(df_385[["x_385", "y_385"]].to_numpy(), k=1)

matched_df = pd.DataFrame({
    "df_501_index": idx,
    "bus385_id": df_385["bus385_id"].values,
    "x_385": df_385["x_385"].values,
    "y_385": df_385["y_385"].values
})

grouped = matched_df.groupby("df_501_index").agg({
    "bus385_id": list,
    "x_385": list,
    "y_385": list
}).rename(columns={
    "bus385_id": "assigned_bus385_ids",
    "x_385": "assigned_x_385",
    "y_385": "assigned_y_385"
})


df_501_updated = df_501.copy()
df_501_updated["assigned_bus385_ids"] = None
df_501_updated["assigned_x_385"] = None
df_501_updated["assigned_y_385"] = None

df_501_updated.update(grouped)

print("成功分配 df_385 总数：", matched_df.shape[0])
print("被分配的 df_501 总数：", grouped.shape[0]) 
# df_501["bus385_id"] = np.nan
# df_501["x_385"] = np.nan
# df_501["y_385"] = np.nan

# df_501["bus385_id"] = df_501["bus385_id"].astype(str)
# df_501.loc[idx, "bus385_id"] = df_385["bus385_id"].values
# df_501.loc[idx, "x_385"] = df_385["x_385"].values
# df_501.loc[idx, "y_385"] = df_385["y_385"].values

df_501_updated

成功分配 df_385 总数： 385
被分配的 df_501 总数： 254


Unnamed: 0,bus501_id,x_501,y_501,bus385_id,x_385,y_385,assigned_bus385_ids,assigned_x_385,assigned_y_385
0,way/975637991,-1.271945,60.293401,,,,,,
1,relation/8087295-275,-3.753620,58.575786,6441,-3.812256,58.546761,[6441],[-3.812256],[58.546761]
2,way/495624575-275,-3.512715,58.564233,6442,-3.453827,58.566820,[6442],[-3.453827],[58.56682]
3,way/567187951-275,-3.879158,58.512154,,,,,,
4,way/1262971878,-3.452185,58.479968,6443,-3.453827,58.490823,[6443],[-3.453827],[58.490823]
...,...,...,...,...,...,...,...,...,...
496,way/49922626-400,-2.488599,50.623881,5274,-2.528229,50.628558,[5274],[-2.528229],[50.628558]
497,way/26938722-400,-3.730387,50.471640,5210,-3.691406,50.477609,"[8016, 5210]","[-3.735352, -3.691406]","[50.498579, 50.477609]"
498,way/34656007-400,-4.242535,50.444878,5208,-4.244843,50.460127,[5208],[-4.244843],[50.460127]
499,relation/9572254-400,-4.899645,50.395013,5207,-4.925995,50.368241,[5207],[-4.925995],[50.368241]


In [33]:
df_combined = df_501_updated
df_combined

Unnamed: 0,bus501_id,x_501,y_501,bus385_id,x_385,y_385,assigned_bus385_ids,assigned_x_385,assigned_y_385
0,way/975637991,-1.271945,60.293401,,,,,,
1,relation/8087295-275,-3.753620,58.575786,6441,-3.812256,58.546761,[6441],[-3.812256],[58.546761]
2,way/495624575-275,-3.512715,58.564233,6442,-3.453827,58.566820,[6442],[-3.453827],[58.56682]
3,way/567187951-275,-3.879158,58.512154,,,,,,
4,way/1262971878,-3.452185,58.479968,6443,-3.453827,58.490823,[6443],[-3.453827],[58.490823]
...,...,...,...,...,...,...,...,...,...
496,way/49922626-400,-2.488599,50.623881,5274,-2.528229,50.628558,[5274],[-2.528229],[50.628558]
497,way/26938722-400,-3.730387,50.471640,5210,-3.691406,50.477609,"[8016, 5210]","[-3.735352, -3.691406]","[50.498579, 50.477609]"
498,way/34656007-400,-4.242535,50.444878,5208,-4.244843,50.460127,[5208],[-4.244843],[50.460127]
499,relation/9572254-400,-4.899645,50.395013,5207,-4.925995,50.368241,[5207],[-4.925995],[50.368241]


In [34]:
df_combined.to_csv('buses_combined.csv', index=False)

df_combined['x_385'].notna().sum()

254