In [1]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd

In [2]:
# Load real estate data
df = pd.read_csv('../../../data/processed/mockup_dataset_find_nearby.csv')
df['geometry'] = df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
gdf_real_estate = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')


In [3]:
roads = gpd.read_file(r"D:\CADT\cambodia-latest-free.shp\gis_osm_roads_free_1.shp")


In [4]:
# Project both to a metric CRS for accurate distance (UTM zone 48N for Cambodia)
gdf_real_estate = gdf_real_estate.to_crs(epsg=32648)
roads = roads.to_crs(epsg=32648)


In [5]:
# Define road types to flag
road_types = [
    'bridleway', 'corridor', 'cycleway', 'disused', 'footway', 'motorway', 'path',
    'pedestrian', 'primary', 'residential', 'road', 'secondary', 'service', 'steps',
    'tertiary', 'track', 'trunk', 'trunk_link', 'unclassified', 'unused'
]

In [6]:
# Function to flag nearby road types within 100 meters
def find_nearby_road_types(row, roads_gdf, distance=10000):
    nearby = roads_gdf[roads_gdf.distance(row.geometry) <= distance]
    flags = {}
    for road_type in road_types:
        flags[f'f_{road_type}'] = int(road_type in nearby['fclass'].values)
    return pd.Series(flags)


In [7]:

# Apply to all real estate points
flags_df = gdf_real_estate.apply(find_nearby_road_types, axis=1, roads_gdf=roads, distance=10000)

In [8]:
# Combine with original data
result = pd.concat([df.reset_index(drop=True), flags_df.reset_index(drop=True)], axis=1)

# Show result
print(result.head())

   Unnamed: 0 address_subdivision address_locality    address_line_2  \
0           0          Phnom Penh      Chbar Ampov        Preaek Pra   
1           1          Phnom Penh      Chamkar Mon       Tonle Basak   
2           2          Phnom Penh        Mean Chey  Stueng Mean Chey   
3           3          Phnom Penh      Pur SenChey        Chaom Chau   
4           4          Phnom Penh        Mean Chey      Boeng Tumpun   

              h_id  price_per_m2  land_area      price   longitude   latitude  \
0  8865846a59fffff       2471.51         66  163119.66  104.967388  11.513522   
1  8865846ae1fffff       2234.87         80  178789.60  104.927932  11.557310   
2  8865846ad1fffff       2385.72         44  104971.68  104.901617  11.530884   
3  88658461a1fffff       2005.10         32   64163.20  104.877972  11.529098   
4  8865846ad5fffff       1048.19         40   41927.60  104.905288  11.536313   

   ...  f_road  f_secondary  f_service  f_steps  f_tertiary  f_track  f_trunk  \

In [9]:
result

Unnamed: 0.1,Unnamed: 0,address_subdivision,address_locality,address_line_2,h_id,price_per_m2,land_area,price,longitude,latitude,...,f_road,f_secondary,f_service,f_steps,f_tertiary,f_track,f_trunk,f_trunk_link,f_unclassified,f_unused
0,0,Phnom Penh,Chbar Ampov,Preaek Pra,8865846a59fffff,2471.51,66,163119.66,104.967388,11.513522,...,0,1,1,1,1,1,1,1,1,0
1,1,Phnom Penh,Chamkar Mon,Tonle Basak,8865846ae1fffff,2234.87,80,178789.60,104.927932,11.557310,...,0,1,1,1,1,1,1,1,1,0
2,2,Phnom Penh,Mean Chey,Stueng Mean Chey,8865846ad1fffff,2385.72,44,104971.68,104.901617,11.530884,...,0,1,1,1,1,1,1,1,1,0
3,3,Phnom Penh,Pur SenChey,Chaom Chau,88658461a1fffff,2005.10,32,64163.20,104.877972,11.529098,...,0,1,1,1,1,1,1,1,1,0
4,4,Phnom Penh,Mean Chey,Boeng Tumpun,8865846ad5fffff,1048.19,40,41927.60,104.905288,11.536313,...,0,1,1,1,1,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,Phnom Penh,Chraoy Chongvar,Preaek Lieb,88658469d5fffff,3966.55,60,237993.00,104.913136,11.643104,...,0,1,1,1,1,1,1,1,1,0
9996,9996,Phnom Penh,Dangkao,Cheung Aek,8865846067fffff,1617.14,44,71154.16,104.917383,11.461337,...,0,1,1,1,1,1,1,1,1,0
9997,9997,Phnom Penh,Chraoy Chongvar,Bak Kaeng,886586a691fffff,734.85,40,29394.00,104.929070,11.704599,...,0,1,1,1,1,1,1,1,1,0
9998,9998,Phnom Penh,Dangkao,Preaek Kampues,8865846005fffff,341.92,82,28037.44,104.901623,11.455297,...,0,1,1,1,1,1,1,1,1,0


In [10]:
result.to_csv('../../../data/processed/mockup_dataset_road.csv', index=False)