In [53]:
import pandas as pd
from shapely.geometry import Point, shape
import requests
import json
import geopandas as gpd
from math import radians, cos, sin, sqrt, atan2

In [42]:
import zipfile

with zipfile.ZipFile("../data/cleaned/resale_price_clean_final.csv.zip", 'r') as zip_ref:
    file_name = zip_ref.namelist()[0] 
    with zip_ref.open(file_name) as file:
        df_resale = pd.read_csv(file)

In [47]:
# For each column, store the max and min for later normalization
min_max_dict = {
    col: {
        "min": df_resale[col].min(),
        "max": df_resale[col].max()
    }
    for col in df_resale.columns
}


In [None]:
ONEMAP_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiI1M2M4NGU0YmJlMWVlZDhmMDczNDk4ODVmZDExYWRjOSIsImlzcyI6Imh0dHA6Ly9pbnRlcm5hbC1hbGItb20tcHJkZXppdC1pdC1uZXctMTYzMzc5OTU0Mi5hcC1zb3V0aGVhc3QtMS5lbGIuYW1hem9uYXdzLmNvbS9hcGkvdjIvdXNlci9wYXNzd29yZCIsImlhdCI6MTc0Mzc1ODAyNSwiZXhwIjoxNzQ0MDE3MjI1LCJuYmYiOjE3NDM3NTgwMjUsImp0aSI6IkF6YWZjWGxDb2tNb0hmQ1AiLCJ1c2VyX2lkIjozNTA0LCJmb3JldmVyIjpmYWxzZX0.CMD124pML3xaJU45AklBASBYNmojp_wctKoRupiDkQ0"

In [None]:
# # Planning area
# def load_planning_area_polygons():
#     url = "https://www.onemap.gov.sg/api/public/popapi/getAllPlanningarea?year=2024"
#     headers = {"Authorization": ONEMAP_TOKEN}
#     response = requests.get(url,headers=headers)
#     return response.json()

# planning_area = load_planning_area_polygons()
# with open('../data/raw/planning_area.json', 'w') as f:
#     json.dump(planning_area, f, indent=2)


In [36]:
with open('../data/raw/planning_area.json') as f:
    planning_areas = json.load(f)['SearchResults']

In [54]:
def get_coordinates_from_postal(postal_code):
    url = f"https://www.onemap.gov.sg/api/common/elastic/search?searchVal={postal_code}&returnGeom=Y&getAddrDetails=Y&pageNum=1"
    headers = {"Authorization": ONEMAP_TOKEN}
    response = requests.get(url, headers=headers)
    data = response.json()
    if data["found"] > 0:
        result = data["results"][0]
        return float(result["LATITUDE"]), float(result["LONGITUDE"])
    return None, None


def get_planning_area_from_point(lat, lon, planning_areas):
    point = Point(lon, lat) 
    
    for area in planning_areas:
        geojson = json.loads(area['geojson'])  
        polygon = shape(geojson)
        
        if polygon.contains(point):
            return area['pln_area_n']
    
    return None

def get_planning_area_from_postal(postal_code):
    lat, lon = get_coordinates_from_postal(postal_code)
    if (lat is None) or (lon is None):
        return "Invalid postal code"
    return get_planning_area_from_point(lat, lon, planning_areas)

In [None]:
bus_stops = gpd.read_file("../data/raw/BusStopLocation_Nov2024/BusStop.shp")
MRT_stops = pd.read_csv("../data/raw/MRT Stations.csv")

def haversine(lat1, lon1, lat2, lon2):
    R = 6371000  # radius of Earth in meters
    phi1, phi2 = radians(lat1), radians(lat2)
    dphi = radians(lat2 - lat1)
    dlambda = radians(lon2 - lon1)
    a = sin(dphi/2)**2 + cos(phi1)*cos(phi2)*sin(dlambda/2)**2
    return 2*R*atan2(sqrt(a), sqrt(1 - a))

from pyproj import Transformer

transformer = Transformer.from_crs("EPSG:3414", "EPSG:4326", always_xy=True)

def svy21_to_wgs84(easting, northing):
    lon, lat = transformer.transform(easting, northing)
    return lat, lon


bus_stops['x_coord'] = bus_stops.geometry.apply(lambda geom: geom.x)
bus_stops['y_coord'] = bus_stops.geometry.apply(lambda geom: geom.y)
bus_stops['Latitude'], bus_stops['Longitude'] = zip(*bus_stops.apply(lambda row: svy21_to_wgs84(row['x_coord'], row['y_coord']), axis=1))

def haversine(lat1, lon1, lat2, lon2):
    R = 6371000  # Radius of Earth in meters
    phi1, phi2 = radians(lat1), radians(lat2)
    dphi = radians(lat2 - lat1)
    dlambda = radians(lon2 - lon1)
    a = sin(dphi/2)**2 + cos(phi1)*cos(phi2)*sin(dlambda/2)**2
    return 2 * R * atan2(sqrt(a), sqrt(1 - a))

def get_nearest_distances(lat, lon):
    MRT_stops['distance'] = MRT_stops.apply(
        lambda row: haversine(lat, lon, row['Latitude'], row['Longitude']), axis=1
    )
    nearest_mrt_distance = MRT_stops['distance'].min()

    # Calculate distances to all bus stops
    bus_stops['distance'] = bus_stops.apply(
        lambda row: haversine(lat, lon, row['Latitude'], row['Longitude']), axis=1
    )
    nearest_bus_distance = bus_stops['distance'].min()

    return nearest_mrt_distance, nearest_bus_distance



In [None]:
def normalize_column(value, col):
    min_val = min_max_dict[col]["min"]
    max_val = min_max_dict[col]["max"]
    if max_val == min_val:
        return 0 
    return (value - min_val) / (max_val - min_val)


In [None]:
def price_predict(storey_range, floor_area, remaining_lease, postal_code, model):
    town = get_planning_area_from_postal(postal_code)
    lat, lon = get_coordinates_from_postal(postal_code)
    # town, lat, lon
    town = get_planning_area_from_postal(postal_code)
    lat, lon = get_coordinates_from_postal(postal_code)

    ########

    ########

    X_input = pd.DataFrame([{
        'town': town,
        'storey_range': storey_range,
        'floor_area_sqm': floor_area,
        'remaining_lease': remaining_lease,
        'lat': lat,
        'lon': lon,
        'nearest_mrt_distance': get_nearest_distances(lat,lon)[0],
        'nearest_bus_distance': get_nearest_distances(lat,lon)[1],
        'education_score': ,
        'shopping_score': ,
        'food_score': ,
        'recreation_score': ,
        'healthcare_score': ,

        # Based on 2024-12 data
        'inflation_rate (x100)': 0.3468,
        'resident_unemployment_rate': 2.0,
        'interest_rate': 2.1123,
        'fx_rate': 1.3503,

        'avg_household_income': ,
        'NoReligion': ,
        'Buddhism':,
        'Taoism1': ,
        'Islam': ,
        'Hinduism': ,
        'Sikhism': ,
        'Christianity_Catholic': ,
        'Christianity_OtherChristians': ,
        'OtherReligions': ,
        'Chinese': ,
        'Malays': ,
        'Indians': ,
        'Others': ,
        'priv_prop': ,
        'priv_prop': , # ??
        'year': 2025,
        'month_num': 4
        }])
    
    # for each input, make them normalized by using their (original value - min)/(max - min)
    for col in X_input.columns:
        if col in min_max_dict:
            X_input[col] = X_input[col].apply(lambda x: normalize_column(x, col))

    return model.predict(X_input)

