In [1]:
import requests
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from scipy.spatial import cKDTree
import os



In [2]:
df_resale = pd.read_csv('../data/raw/ResalePrice.csv')
df_resale['address'] = df_resale['block'] + ' ' + df_resale['street_name']

len(df_resale['address'].unique())


9624

### Get coordinates for each address

In [3]:
api_token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiI1M2M4NGU0YmJlMWVlZDhmMDczNDk4ODVmZDExYWRjOSIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC1uZXctMTYzMzc5OTU0Mi5hcC1zb3V0aGVhc3QtMS5lbGIuYW1hem9uYXdzLmNvbS9hcGkvdjIvdXNlci9wYXNzd29yZCIsImlhdCI6MTc0Mzc1ODAyNSwiZXhwIjoxNzQ0MDE3MjI1LCJuYmYiOjE3NDM3NTgwMjUsImp0aSI6IkF6YWZjWGxDb2tNb0hmQ1AiLCJ1c2VyX2lkIjozNTA0LCJmb3JldmVyIjpmYWxzZX0.CMD124pML3xaJU45AklBASBYNmojp_wctKoRupiDkQ0"


In [4]:
df_resale

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price,address
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61 years 04 months,232000.0,406 ANG MO KIO AVE 10
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,60 years 07 months,250000.0,108 ANG MO KIO AVE 4
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,262000.0,602 ANG MO KIO AVE 5
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,62 years 01 month,265000.0,465 ANG MO KIO AVE 10
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,265000.0,601 ANG MO KIO AVE 5
...,...,...,...,...,...,...,...,...,...,...,...,...
201055,2025-02,YISHUN,EXECUTIVE,364,YISHUN RING RD,01 TO 03,145.0,Maisonette,1988,62 years 03 months,860000.0,364 YISHUN RING RD
201056,2025-02,YISHUN,EXECUTIVE,328,YISHUN RING RD,01 TO 03,142.0,Apartment,1988,62 years 05 months,845000.0,328 YISHUN RING RD
201057,2025-02,YISHUN,EXECUTIVE,723,YISHUN ST 71,07 TO 09,146.0,Maisonette,1986,60 years 05 months,818888.0,723 YISHUN ST 71
201058,2025-02,YISHUN,EXECUTIVE,824,YISHUN ST 81,01 TO 03,145.0,Apartment,1987,61 years 10 months,868888.0,824 YISHUN ST 81


In [5]:
def get_coordinates(address, token):
    url = "https://www.onemap.gov.sg/api/common/elastic/search"
    params = {'searchVal': address, 'returnGeom': 'Y', 'getAddrDetails': 'Y', 'pageNum': 1}
    headers = {'Authorization': token}

    try:
        response = requests.get(url, headers=headers, params=params).json()
        if response['found'] > 0:
            result = response['results'][0]
            return result['LATITUDE'], result['LONGITUDE']
    except requests.exceptions.RequestException:
        pass
    return None


df_resale['latitude'] = None
df_resale['longitude'] = None


unique_addresses = df_resale['address'].unique()
address_coords = {}

for address in unique_addresses:
    coords = get_coordinates(address, api_token)
    if coords:
        address_coords[address] = coords

df_resale['latitude'] = df_resale['address'].map(lambda x: address_coords.get(x, (None, None))[0]).astype(float)
df_resale['longitude'] = df_resale['address'].map(lambda x: address_coords.get(x, (None, None))[1]).astype(float)

### Generate distance to MRT station
Using haversin formula

In [7]:

import numpy as np
from math import radians, cos, sin, sqrt, atan2

bus_stops = gpd.read_file("../data/raw/BusStopLocation_Nov2024/BusStop.shp")
MRT_stops = pd.read_csv("../data/raw/MRT Stations.csv")

def haversine(lat1, lon1, lat2, lon2):
    R = 6371000  # radius of Earth in meters
    phi1, phi2 = radians(lat1), radians(lat2)
    dphi = radians(lat2 - lat1)
    dlambda = radians(lon2 - lon1)
    a = sin(dphi/2)**2 + cos(phi1)*cos(phi2)*sin(dlambda/2)**2
    return 2*R*atan2(sqrt(a), sqrt(1 - a))

from pyproj import Transformer

transformer = Transformer.from_crs("EPSG:3414", "EPSG:4326", always_xy=True)

def svy21_to_wgs84(easting, northing):
    lon, lat = transformer.transform(easting, northing)
    return lat, lon


bus_stops['x_coord'] = bus_stops.geometry.apply(lambda geom: geom.x)
bus_stops['y_coord'] = bus_stops.geometry.apply(lambda geom: geom.y)
bus_stops['Latitude'], bus_stops['Longitude'] = zip(*bus_stops.apply(lambda row: svy21_to_wgs84(row['x_coord'], row['y_coord']), axis=1))

def calculate_nearest_stops(row, mrt_stops, bus_stops):
    hdb_lat, hdb_lon = row['latitude'], row['longitude']
    if pd.isnull(hdb_lat) or pd.isnull(hdb_lon):
        return pd.Series([None, None])

    mrt_distances = mrt_stops.apply(lambda x: haversine(hdb_lat, hdb_lon, x['Latitude'], x['Longitude']), axis=1)
    nearest_mrt_distance = mrt_distances.min()

    bus_distances = bus_stops.apply(lambda x: haversine(hdb_lat, hdb_lon, x['Latitude'], x['Longitude']), axis=1)
    nearest_bus_distance = bus_distances.min()

    return pd.Series([nearest_mrt_distance, nearest_bus_distance])


unique_addresses = df_resale[['latitude', 'longitude']].drop_duplicates().reset_index(drop=True)
unique_addresses[['nearest_mrt_distance', 'nearest_bus_distance']] = unique_addresses.apply(calculate_nearest_stops, axis=1, mrt_stops=MRT_stops, bus_stops=bus_stops)

address_to_nearest_stops = unique_addresses.set_index(['latitude', 'longitude'])[['nearest_mrt_distance', 'nearest_bus_distance']].to_dict('index')

df_resale['nearest_mrt_distance'] = df_resale.apply(lambda row: address_to_nearest_stops.get((row['latitude'], row['longitude']), {}).get('nearest_mrt_distance'), axis=1)
df_resale['nearest_bus_distance'] = df_resale.apply(lambda row: address_to_nearest_stops.get((row['latitude'], row['longitude']), {}).get('nearest_bus_distance'), axis=1)


df_resale[['nearest_mrt_distance', 'nearest_bus_distance']] = df_resale.apply(calculate_nearest_stops, axis=1, mrt_stops=MRT_stops, bus_stops=bus_stops)

KeyboardInterrupt: 

In [None]:
df_resale

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price,address,latitude,longitude,nearest_mrt_distance,nearest_bus_distance
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61 years 04 months,232000.0,406 ANG MO KIO AVE 10,1.362005,103.853880,926.898193,97.969153
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,60 years 07 months,250000.0,108 ANG MO KIO AVE 4,1.370966,103.838202,197.441011,168.954939
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,262000.0,602 ANG MO KIO AVE 5,1.380709,103.835368,499.284921,138.097011
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,62 years 01 month,265000.0,465 ANG MO KIO AVE 10,1.366201,103.857201,880.424478,72.020174
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,265000.0,601 ANG MO KIO AVE 5,1.381041,103.835132,471.153365,123.963837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201055,2025-02,YISHUN,EXECUTIVE,364,YISHUN RING RD,01 TO 03,145.0,Maisonette,1988,62 years 03 months,860000.0,364 YISHUN RING RD,1.429046,103.844622,1069.887447,145.778587
201056,2025-02,YISHUN,EXECUTIVE,328,YISHUN RING RD,01 TO 03,142.0,Apartment,1988,62 years 05 months,845000.0,328 YISHUN RING RD,1.429780,103.843057,895.831038,241.983932
201057,2025-02,YISHUN,EXECUTIVE,723,YISHUN ST 71,07 TO 09,146.0,Maisonette,1986,60 years 05 months,818888.0,723 YISHUN ST 71,1.426020,103.829939,679.698412,167.649503
201058,2025-02,YISHUN,EXECUTIVE,824,YISHUN ST 81,01 TO 03,145.0,Apartment,1987,61 years 10 months,868888.0,824 YISHUN ST 81,1.413745,103.833303,406.125677,153.812977


In [None]:
df_resale.to_csv('../data/cleaned/resale_price_cleaned_0.csv', index=False)

In [None]:
geospatial_df = df_resale[['street_name', 'latitude', 'longitude', 'nearest_mrt_distance', 'nearest_bus_distance']].drop_duplicates()
geospatial_df

Unnamed: 0,street_name,latitude,longitude,nearest_mrt_distance,nearest_bus_distance
0,ANG MO KIO AVE 10,1.362005,103.853880,926.898193,97.969153
1,ANG MO KIO AVE 4,1.370966,103.838202,197.441011,168.954939
2,ANG MO KIO AVE 5,1.380709,103.835368,499.284921,138.097011
3,ANG MO KIO AVE 10,1.366201,103.857201,880.424478,72.020174
4,ANG MO KIO AVE 5,1.381041,103.835132,471.153365,123.963837
...,...,...,...,...,...
199925,BUANGKOK LINK,1.385772,103.883485,476.366915,166.405416
199926,BUANGKOK LINK,1.384839,103.883129,585.219209,90.385984
200495,ANCHORVALE LANE,1.393436,103.884812,468.803169,213.588820
200497,ANCHORVALE LANE,1.393907,103.884507,528.251609,222.585947


In [None]:
#geospatial_df.to_csv('../data/cleaned/hdb_geospatial.csv', index=False)