In [3]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd

In [4]:
df = pd.read_csv('../../data/processed/combined_data_v3_nearby_central_city.csv')

In [5]:
df

Unnamed: 0,address_subdivision,address_locality,address_line_2,h_id,category_name,type,price_per_m2,land_area,price,longitude,...,Sisowath_Riverside_Park_nearest,Sisowath_Riverside_Park_1_2km,Sisowath_Riverside_Park_2_3km,Sisowath_Riverside_Park_3_5km,Sisowath_Riverside_Park_5_10km,Phnom_Penh_Airport_nearest,Phnom_Penh_Airport_1_2km,Phnom_Penh_Airport_2_3km,Phnom_Penh_Airport_3_5km,Phnom_Penh_Airport_5_10km
0,Phnom Penh,Meanchey,Chak Angrae Leu,8865846ac7fffff,Flat,residential,1000.000000,420.0,420000.00,104.913586,...,0,0,0,1,0,0,0,0,0,1
1,Phnom Penh,Por Sen Chey,Kakap,8865846f01fffff,Apartment,residential,804.597701,435.0,350000.00,104.842882,...,0,0,0,0,1,0,1,0,0,0
2,Phnom Penh,Sen Sok,Tuek Thla,8865846f69fffff,Apartment,residential,1971.326165,837.0,1650000.00,104.871316,...,0,0,0,0,1,0,0,0,1,0
3,Phnom Penh,Chamkarmon,BKK 3,8865846ac7fffff,Apartment,residential,6021.505376,465.0,2800000.00,104.913586,...,0,0,0,1,0,0,0,0,0,1
4,Phnom Penh,Dangkao,Dangkao,8865846183fffff,House,residential,927.835052,485.0,450000.00,104.856173,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8241,Phnom Penh,Chbar Ampov,Nirouth,8865846a0bfffff,Shophouse,residential,3388.890000,56.0,189777.84,104.959508,...,0,0,0,1,0,0,0,0,0,0
8242,Phnom Penh,Sen Sok,Phnom Penh Thmey,8865846f25fffff,Land/Development,residential,460.340000,96.0,44192.64,104.854256,...,0,0,0,0,1,0,0,0,0,1
8243,Phnom Penh,Sen Sok,Phnom Penh Thmey,886584688bfffff,Land/Development,residential,1872.660000,32.0,59925.12,104.877009,...,0,0,0,0,1,0,0,0,0,1
8244,Phnom Penh,Sen Sok,Krang Thnong,8865846f27fffff,House,residential,2000.000000,76.0,152000.00,104.844053,...,0,0,0,0,1,0,0,0,1,0


In [6]:
# Load real estate data
df['geometry'] = df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
gdf_real_estate = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')


In [7]:
roads = gpd.read_file(r"D:\CADT\cambodia-latest-free.shp\gis_osm_roads_free_1.shp")


In [8]:
# Project both to a metric CRS for accurate distance (UTM zone 48N for Cambodia)
gdf_real_estate = gdf_real_estate.to_crs(epsg=32648)
roads = roads.to_crs(epsg=32648)


In [9]:
# Define road types to flag
road_types = [
    'bridleway', 'corridor', 'cycleway', 'disused', 'footway', 'motorway', 'path',
    'pedestrian', 'primary', 'residential', 'road', 'secondary', 'service', 'steps',
    'tertiary', 'track', 'trunk', 'trunk_link', 'unclassified', 'unused'
]

In [None]:
# Function to flag nearby road types within 1000 meters
def find_nearby_road_types(row, roads_gdf, distance=1000):
    nearby = roads_gdf[roads_gdf.distance(row.geometry) <= distance]
    flags = {}
    for road_type in road_types:
        flags[f'f_{road_type}'] = int(road_type in nearby['fclass'].values)
    return pd.Series(flags)


In [16]:

# Apply to all real estate points
flags_df = gdf_real_estate.apply(find_nearby_road_types, axis=1, roads_gdf=roads, distance=1000)

In [17]:
# Combine with original data
result = pd.concat([df.reset_index(drop=True), flags_df.reset_index(drop=True)], axis=1)

In [26]:
# Calculate mean, max, min of 'price_per_m2' grouped by 'h_id'
price_stats = result.groupby('h_id')['price_per_m2'].agg(['mean','median', 'max', 'min']).reset_index()

# Merge these stats back to the original dataframe on 'h_id'
result = result.merge(price_stats, on='h_id', how='left', suffixes=('', '_price_per_m2'))

# The columns 'mean', 'max', 'min' are now added to df

In [None]:
# result.drop(columns=['geometry', 'mean', 'median', 'max', 'min'], inplace=True)

In [None]:
result = result[result['price_per_m2'] >= 100]

In [27]:
result

Unnamed: 0,address_subdivision,address_locality,address_line_2,h_id,category_name,type,price_per_m2,land_area,price,longitude,...,f_tertiary,f_track,f_trunk,f_trunk_link,f_unclassified,f_unused,mean,median,max,min
0,Phnom Penh,Meanchey,Chak Angrae Leu,8865846ac7fffff,Flat,residential,1000.000000,420.0,420000.00,104.913586,...,1,0,1,1,0,0,2702.583583,2343.750000,6465.517241,110.000000
1,Phnom Penh,Por Sen Chey,Kakap,8865846f01fffff,Apartment,residential,804.597701,435.0,350000.00,104.842882,...,1,0,1,0,0,0,1235.634744,1235.633333,1666.670000,804.597701
2,Phnom Penh,Sen Sok,Tuek Thla,8865846f69fffff,Apartment,residential,1971.326165,837.0,1650000.00,104.871316,...,1,1,1,0,0,0,1779.287046,1764.710000,1971.330000,1600.000000
3,Phnom Penh,Chamkarmon,BKK 3,8865846ac7fffff,Apartment,residential,6021.505376,465.0,2800000.00,104.913586,...,1,0,1,1,0,0,2702.583583,2343.750000,6465.517241,110.000000
4,Phnom Penh,Dangkao,Dangkao,8865846183fffff,House,residential,927.835052,485.0,450000.00,104.856173,...,1,1,0,0,0,0,1408.401654,1379.598571,2941.176471,120.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8192,Phnom Penh,Chbar Ampov,Nirouth,8865846a0bfffff,Shophouse,residential,3388.890000,56.0,189777.84,104.959508,...,1,1,1,1,0,0,3388.889947,3388.890000,3388.890000,3388.888889
8193,Phnom Penh,Sen Sok,Phnom Penh Thmey,8865846f25fffff,Land/Development,residential,460.340000,96.0,44192.64,104.854256,...,1,1,0,0,1,0,460.339997,460.340000,460.340000,460.339943
8194,Phnom Penh,Sen Sok,Phnom Penh Thmey,886584688bfffff,Land/Development,residential,1872.660000,32.0,59925.12,104.877009,...,1,0,0,0,0,0,1872.659961,1872.660000,1872.660000,1872.659176
8195,Phnom Penh,Sen Sok,Krang Thnong,8865846f27fffff,House,residential,2000.000000,76.0,152000.00,104.844053,...,1,1,0,0,1,0,2000.000000,2000.000000,2000.000000,2000.000000


In [28]:
result.to_csv('../../data/processed/combined_data_v4_nearby_road_1km.csv', index=False)