# Geocoding HDB Property Info Dataset using OneMap API

In [25]:
# !pip install requests time pandas tqdm

In [26]:
#Import necessary libraries
import requests
import time
import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()

In [27]:
#Define functions for API calling and geocoding
def call_loop(url_given, attempts=3, delay=5):
    """
    Call the given URL with retries if response status is not 200.
    """
    response_status_code = 1
    for _ in range(attempts + 1):
        if response_status_code != 200:
            if response_status_code != 1:
                time.sleep(delay)
            response = requests.get(url_given)
            response_status_code = response.status_code
        else:
            return response
    print(f"For URL: {url_given}")
    print(f"Response Status Code: {response_status_code}")
    return None

def collect_results(full_add, attempts=3, delay=5):
    """
    Collects all the result from OneMap API into a list of dictionaries.
    Uses the full address (with spaces replaced by '+') to form the URL.
    """
    url_p1 = "https://www.onemap.gov.sg/api/common/elastic/search?searchVal=+"
    url_p2 = "+&returnGeom=Y&getAddrDetails=Y"
    final_url = url_p1 + full_add + url_p2
    
    results = []
    response = call_loop(final_url, attempts, delay)
    if response is not None:
        # Using response.json() is safer than eval(response.text)
        feedback = response.json()
        found = feedback.get('found', 0)
        totalpages = feedback.get('totalNumPages', 1)
    else:
        print(f"Exited collect_results function as there is no response for address: {full_add}")
        return results  # Returns empty list
    
    if found == 0:
        return results  # No results found
    else:
        results = feedback.get('results', [])
        # If there are multiple pages, get additional results
        if found > 1 and totalpages > 1:
            for x in range(totalpages - 1):
                page_num = x + 2
                url_with_page = final_url + f"&pageNum={page_num}"
                temp_response = call_loop(url_with_page, attempts, delay)
                if temp_response is not None:
                    temp_feedback = temp_response.json()
                    temp_results = temp_feedback.get('results', [])
                    results.extend(temp_results)
    return results

#Read the input CSV and define a function to get geocoding info per row
df = pd.read_csv('raw.csv')

def get_geocode_info(row):
    """
    Constructs a full address from the row and uses OneMap API to retrieve
    postal code and coordinates.
    """
    # Construct the full address using block and street_name.
    # (You can modify this if you need to include town or other parts.)
    full_address = f"{row['block']} {row['street_name']}"
    # Replace spaces with '+' for URL encoding
    full_address_encoded = full_address.replace(" ", "+")
    
    results = collect_results(full_address_encoded)
    if results:
        # Choose the first result (assumed best match)
        res = results[0]
        # Extract postal code and coordinates (keys may be "POSTAL", "X", "Y")
        postal_code = res.get("POSTAL", "")
        longitude = res.get("LONGITUDE", "")
        latitude = res.get("LATITUDE", "")
        return pd.Series([postal_code, longitude, latitude])
    else:
        # No geocoding result found; return empty values.
        return pd.Series(["", "", ""])

In [None]:
# Apply the geocoding function to each row and save the results
df[['postal_code', 'longitude', 'latitude']] = df.progress_apply(get_geocode_info, axis=1)
df.to_csv('geocoded.csv', index=False)
print("Geocoding complete. Results saved to geocoded.csv")


100%|██████████| 199327/199327 [6:54:42<00:00,  8.01it/s]    


Geocoding complete. Results saved to geocoded.csv
