In [1]:
import requests
import time
import pandas as pd
from collections import deque
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
import langid
import re

In [2]:
# List of Cambodian provinces to iterate through
cambodia_provinces = [
    "Phnom Penh",           # Capital (Autonomous Municipality)
    "Banteay Meanchey",
    "Battambang",
    "Kampong Cham",
    "Kampong Chhnang",
    "Kampong Speu",
    "Kampong Thom",
    "Kampot",
    "Kandal",
    "Kep",
    "Koh Kong",
    "Kratié",
    "Mondulkiri",
    "Oddar Meanchey",
    "Pailin",
    "Preah Vihear",
    "Prey Veng",
    "Pursat",
    "Ratanakiri",
    "Siem Reap",
    "Sihanoukville",
    "Stung Treng",
    "Svay Rieng",
    "Takeo",
    "Tboung Khmum"
]


In [3]:
# ——— CONFIGURATION SETTINGS ———
PAGE_SIZE   = 1000    # Number of listings returned per API call
MIN_STEP    = 0.005   # Minimum geographical step for splitting tiles (prevents infinite loops on tiny areas)
EXPECTED    = None    # Set to an int to stop early (e.g., 5000), or None to exhaustively crawl
USER_AGENT  = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
               "AppleWebKit/537.36 (KHTML, like Gecko) "
               "Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.60")

# Geographic bounds for Cambodia (used for initial grid crawl)
TOP_LAT     = 14.704581
BOTTOM_LAT  = 9.913701
LEFT_LON    = 102.313423
RIGHT_LON   = 107.627449

headers = {"User-Agent": USER_AGENT}

In [4]:

def fetch_tile(tl_lat, tl_lon, br_lat, br_lon, property_type, province_name):
    """
    Fetches map points (listings) for a given geographical tile and property type from the API.

    Args:
        tl_lat (float): Top-left latitude of the bounding box.
        tl_lon (float): Top-left longitude of the bounding box.
        br_lat (float): Bottom-right latitude of the bounding box.
        br_lon (float): Bottom-right longitude of the bounding box.
        property_type (str): Type of property (e.g., 'residential', 'commercial', 'borey', 'project').
        province_name (str): Name of the province for the location query (URL encoded).

    Returns:
        list: A list of sample points (listings) found within the tile. Returns an empty list on error.
    """
    # Construct the URL for the API request with dynamic parameters
    url = (
        "https://www.realestate.com.kh/api/listing/map-points/"
        f"?active_tab=popularLocations"
        f"&order_by=relevance"
        f"&property_type={property_type}"
        f"&q=location:{province_name}" # Use the specific province name in the query
        f"&search_type=sale"
        f"&bottom_right_lat={br_lat}&bottom_right_lon={br_lon}"
        f"&top_left_lat={tl_lat}&top_left_lon={tl_lon}"
        f"&show_all=true&page_size={PAGE_SIZE}"
    )
    try:
        # Make the GET request with a timeout
        resp = requests.get(url, headers=headers, timeout=15)
        resp.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)

        # Parse the JSON response and extract 'points' and 'samples'
        pts = resp.json().get("points", [])
        if not pts:
            return []
        return pts[0].get("samples", [])
    except requests.exceptions.RequestException as e:
        print(f"⚠️ HTTP Request Error for tile {tl_lat,tl_lon,br_lat,br_lon}: {e}")
        return []
    except Exception as e:
        print(f"⚠️ An unexpected error occurred in fetch_tile: {e}")
        return []


In [5]:
request_province_list = [] # Stores counts per province and property type
request_all = []  

In [6]:
url_base_part1 = "https://www.realestate.com.kh/api/listing/map-points/?active_tab=popularLocations&order_by=relevance&property_type="
url_base_part2 = '&q=location:'
url_base_part3 = '&search_type=sale&bottom_right_lat=9.039849866109236&bottom_right_lon=108.07048795635677&top_left_lat=15.785344977938763&top_left_lon=100.34079574208647&order_by=relevance&show_all=true&page_size=1000'

In [7]:
seen_ids = set()


In [8]:
for i in range(len(cambodia_provinces)):
    for j in ['residential', 'commercial', 'borey', 'project']:
        province = cambodia_provinces[i]
        # Construct the initial URL for the province and property type, URL encoding the province name
        url_initial_query = url_base_part1 + j + url_base_part2 + province.replace(" ", "%20") + url_base_part3

        # Make the initial request for the current province and property type
        try:
            response = requests.get(url_initial_query, headers=headers, timeout=15)
            response.raise_for_status() # Raise HTTPError for bad responses
            data = response.json().get("points", [])
            count = data[0]['count'] if data else 0
            samples = data[0].get('samples', []) if data else []
        except requests.exceptions.RequestException as e:
            print(f"⚠️ Initial Request Error for {province} ({j}): {e}")
            count = 0
            samples = []
        except Exception as e:
            print(f"⚠️ An unexpected error occurred during initial request for {province} ({j}): {e}")
            count = 0
            samples = []

        # Record the count for the current province and property type
        request_province_list.append({
            "Province": province,
            "Property_Type": j, # Added property type for better tracking
            "Count":    count
        })

        # If the count is less than the PAGE_SIZE, all listings for this query are returned
        if count < PAGE_SIZE:
            # Add these samples to the main list, ensuring no duplicates
            for item in samples:
                _id = item.get("id")
                if _id and _id not in seen_ids:
                    seen_ids.add(_id)
                    item['type'] = j
                    request_all.append(item)
        else:
            # If the count is at or above PAGE_SIZE, it means there are more listings.
            # Include the initial 1000 samples before performing a detailed grid crawl.
            for item in samples:
                _id = item.get("id")
                if _id and _id not in seen_ids:
                    seen_ids.add(_id)
                    item['type'] = j
                    request_all.append(item)

            # Perform a grid crawl over the entire Cambodia bounds to capture all listings
            print(f"🔍 {province} ({j}) has {count} listings → drilling down…")
            queue = deque([(TOP_LAT, LEFT_LON, BOTTOM_LAT, RIGHT_LON)]) # Initialize queue with full Cambodia bounds
            tiles_processed = 0

            while queue:
                tl_lat, tl_lon, br_lat, br_lon = queue.popleft()
                tiles_processed += 1

                # Fetch listings for the current tile, passing property_type and province_name
                results = fetch_tile(tl_lat, tl_lon, br_lat, br_lon, j, province.replace(" ", "%20"))
                n = len(results)

                # Print progress for the current tile
                print(f"[{province}][{j}][Tile {tiles_processed}] Fetched {n} | "
                      f"Queue: {len(queue)} | Collected: {len(request_all)}")

                lat_span = tl_lat - br_lat
                lon_span = br_lon - tl_lon

                # If the current tile still returns PAGE_SIZE listings and is larger than MIN_STEP,
                # split it into 4 sub-tiles and add them to the queue for further drilling down.
                if n == PAGE_SIZE and lat_span > MIN_STEP and lon_span > MIN_STEP:
                    mid_lat = (tl_lat + br_lat) / 2
                    mid_lon = (tl_lon + br_lon) / 2
                    queue.extend([
                        (tl_lat,    tl_lon,    mid_lat, mid_lon),   # North-West tile
                        (tl_lat,    mid_lon,   mid_lat, br_lon),   # North-East tile
                        (mid_lat,   tl_lon,    br_lat,  mid_lon),   # South-West tile
                        (mid_lat,   mid_lon,   br_lat,  br_lon),    # South-East tile
                    ])
                else:
                    # If the tile is no longer capped or is too small to split, accept its results.
                    # Add unique listings to the main list.
                    for item in results:
                        _id = item.get("id")
                        if _id and _id not in seen_ids:
                            seen_ids.add(_id)
                            item['type'] = j
                            request_all.append(item)

                # If EXPECTED count is set and reached, break the loop early
                if EXPECTED and len(request_all) >= EXPECTED:
                    break

                time.sleep(0.4) # Add a small delay to avoid overwhelming the server

        print(f"✅ After {province} ({j}): total unique = {len(request_all)}\n")

🔍 Phnom Penh (residential) has 3894 listings → drilling down…
[Phnom Penh][residential][Tile 1] Fetched 1000 | Queue: 0 | Collected: 1000
[Phnom Penh][residential][Tile 2] Fetched 2 | Queue: 3 | Collected: 1000
[Phnom Penh][residential][Tile 3] Fetched 0 | Queue: 2 | Collected: 1001
[Phnom Penh][residential][Tile 4] Fetched 1000 | Queue: 1 | Collected: 1001
[Phnom Penh][residential][Tile 5] Fetched 30 | Queue: 4 | Collected: 1001
[Phnom Penh][residential][Tile 6] Fetched 0 | Queue: 3 | Collected: 1029
[Phnom Penh][residential][Tile 7] Fetched 1000 | Queue: 2 | Collected: 1029
[Phnom Penh][residential][Tile 8] Fetched 0 | Queue: 5 | Collected: 1029
[Phnom Penh][residential][Tile 9] Fetched 0 | Queue: 4 | Collected: 1029
[Phnom Penh][residential][Tile 10] Fetched 0 | Queue: 3 | Collected: 1029
[Phnom Penh][residential][Tile 11] Fetched 4 | Queue: 2 | Collected: 1029
[Phnom Penh][residential][Tile 12] Fetched 0 | Queue: 1 | Collected: 1031
[Phnom Penh][residential][Tile 13] Fetched 1000 |

In [9]:
df_province_list = pd.DataFrame(request_province_list)
df_all = pd.DataFrame(request_all)

In [10]:
# ——— FINAL DEDUPLICATION AND SUMMARY ———
if 'id' in df_all.columns:
    before = len(df_all)
    df_all = df_all.drop_duplicates(subset='id') # Remove duplicate listings based on 'id'
    after = len(df_all)
    print(f"🔄 Final dedupe: {before} → {after} unique listings")


🔄 Final dedupe: 6369 → 6369 unique listings


In [11]:

# Convert 'Count' column to integer type and sum for overall province counts
df_province_list['Count'] = df_province_list['Count'].astype(int)
print(f"Total listings counted across all initial API queries: {df_province_list['Count'].sum()}")
df_province_summary = df_province_list.groupby('Province')['Count'].sum().reset_index()
print("Summary of listings per province (initial query counts):")
print(df_province_summary)

Total listings counted across all initial API queries: 6496
Summary of listings per province (initial query counts):
            Province  Count
0   Banteay Meanchey     11
1         Battambang     20
2       Kampong Cham     15
3    Kampong Chhnang      7
4       Kampong Speu     56
5       Kampong Thom     10
6             Kampot    100
7             Kandal    229
8                Kep     35
9           Koh Kong     21
10            Kratié      2
11        Mondulkiri      1
12    Oddar Meanchey      2
13            Pailin      3
14        Phnom Penh   4316
15      Preah Vihear      3
16         Prey Veng      4
17            Pursat      7
18        Ratanakiri      3
19         Siem Reap   1460
20     Sihanoukville    166
21       Stung Treng      3
22        Svay Rieng      3
23             Takeo     15
24      Tboung Khmum      4


In [12]:

# Base URL for individual property listings
base_listing_url = "https://www.realestate.com.kh/"

# Create new columns if they don't already exist in df_all
if 'information' not in df_all.columns:
    df_all['information'] = None
if 'source_url' not in df_all.columns: # New column to store the URL of the scraped page
    df_all['source_url'] = None

# Define the scraping function for individual listing pages
def fetch_info(index, id_value):
    """
    Fetches detailed information and the source URL for a single property listing.

    Args:
        index (int): The index of the row in the DataFrame being processed.
        id_value (str/int): The unique ID of the property listing.

    Returns:
        tuple: (index, raw_text, temp_url) if successful, or (index, None, temp_url) if an error occurs
               or the target span is not found. The URL is always returned.
    """
    # Construct the full URL for the specific listing page
    temp_url = base_listing_url + str(id_value) + '/'
    try:
        # Make the GET request with a timeout
        response = requests.get(temp_url, timeout=10)
        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")

        # Find the specific <span> element containing the desired information
        span = soup.find("span", class_="css-zrj3zm")
        if span:
            # Extract text from the span, cleaning up whitespace
            raw_text = span.get_text(separator=" ", strip=True)
            print(f"[{index}] ✔ Success: {temp_url}")
            return index, raw_text, temp_url # Return the URL along with text
        else:
            print(f"[{index}] ✘ No span found: {temp_url}")
            return index, None, temp_url # Return the URL even if span not found
    except requests.exceptions.RequestException as e:
        print(f"[{index}] ⚠ HTTP Request Error for {temp_url}: {e}")
        return index, None, temp_url # Return the URL on error
    except Exception as e:
        print(f"[{index}] ⚠ Unexpected Error for {temp_url}: {e}")
        return index, None, temp_url # Return the URL on error

# Use ThreadPoolExecutor for parallel execution to speed up scraping
with ThreadPoolExecutor(max_workers=25) as executor:
    # Submit scraping tasks for each listing in df_all
    futures = [
        executor.submit(fetch_info, i, df_all.iloc[i]['id']) for i in range(len(df_all))
    ]

    # Process results as they complete
    for future in as_completed(futures):
        index, result_text, result_url = future.result() # Unpack the returned values
        df_all.at[index, 'information'] = result_text
        df_all.at[index, 'source_url'] = result_url # Assign the URL to the new column


[2] ✔ Success: https://www.realestate.com.kh/212595/
[4] ✔ Success: https://www.realestate.com.kh/212653/
[12] ✔ Success: https://www.realestate.com.kh/141245/
[10] ✔ Success: https://www.realestate.com.kh/229653/
[13] ✔ Success: https://www.realestate.com.kh/145091/
[9] ✔ Success: https://www.realestate.com.kh/127135/
[18] ✔ Success: https://www.realestate.com.kh/233750/
[15] ✔ Success: https://www.realestate.com.kh/156169/
[7] ✔ Success: https://www.realestate.com.kh/218563/
[1] ✔ Success: https://www.realestate.com.kh/211997/
[22] ✔ Success: https://www.realestate.com.kh/235065/
[11] ✔ Success: https://www.realestate.com.kh/142378/
[6] ✔ Success: https://www.realestate.com.kh/218884/
[8] ✔ Success: https://www.realestate.com.kh/218736/
[21] ✔ Success: https://www.realestate.com.kh/235048/
[14] ✔ Success: https://www.realestate.com.kh/153997/
[5] ✔ Success: https://www.realestate.com.kh/217976/
[20] ✔ Success: https://www.realestate.com.kh/231183/
[24] ✔ Success: https://www.realesta

In [13]:
# Add a new column for language detection
df_all['language'] = None

# Iterate through the DataFrame to classify the language of the 'information' text
for i in range(len(df_all)):
    text = df_all.loc[i, 'information']
    if text: # Only classify if 'information' text is not None or empty
        lang, confidence = langid.classify(str(text))
        df_all.loc[i, 'language'] = lang
    else:
        df_all.loc[i, 'language'] = 'unknown' # Assign 'unknown' if no information was scraped


In [14]:
df_all.loc[~df_all['language'].isin(['en', 'km', 'ja', 'zh']), 'language'] = 'en' # Default to English for unrecognized languages
df_all.loc[df_all['language'].isin(['zh']), 'language'] = 'zh-CN' # Standardize Chinese to zh-CN

print("\nDataFrame after language detection and URL addition (first 5 rows):")
print(df_all.head()) # Print head to show new 'source_url' and 'language' columns



DataFrame after language detection and URL addition (first 5 rows):
       id                                           headline price_display  \
0  204942  A flat (2 floors) near Hengly market and near ...      $150,000   
1  211997  Twin Villa (Twin Villa) in Borey Highland 2005...      $269,000   
2  212595                                     house for sale       $85,000   
3  211987                    House for sale in Meanchey Area      $450,000   
4  212653  House for Sale Urgently | Extra Space and Stai...       $85,000   

  rent_display  bedrooms  bathrooms  land_area  \
0                    6.0        4.0        NaN   
1                    4.0        7.0        NaN   
2                    2.0        2.0        NaN   
3                    8.0        6.0        0.0   
4                    2.0        2.0        NaN   

                                       thumbnail_url  \
0  https://images.realestate.com.kh/__sized__/lis...   
1  https://images.realestate.com.kh/__sized__/lis

In [15]:
df_final_amount = df_all.groupby('address_subdivision').size().reset_index(name='Scraped_Count')


In [16]:
df_final_amount = pd.merge(
    df_province_summary,
    df_final_amount,
    left_on='Province',
    right_on='address_subdivision',
    how='left'
)
# Rename 'Count' from df_province_summary to 'API_Count' for clarity
df_final_amount = df_final_amount.rename(columns={'Count': 'API_Count'})
df_final_amount = df_final_amount.drop(columns=['address_subdivision']) # Drop duplicate column used for merging

print("\nFinal aggregated DataFrame (comparison of API counts vs. scraped counts):")
print(df_final_amount)



Final aggregated DataFrame (comparison of API counts vs. scraped counts):
            Province  API_Count  Scraped_Count
0   Banteay Meanchey         11           10.0
1         Battambang         20           19.0
2       Kampong Cham         15           14.0
3    Kampong Chhnang          7            7.0
4       Kampong Speu         56           56.0
5       Kampong Thom         10            9.0
6             Kampot        100          100.0
7             Kandal        229          225.0
8                Kep         35           33.0
9           Koh Kong         21           21.0
10            Kratié          2            NaN
11        Mondulkiri          1            1.0
12    Oddar Meanchey          2            2.0
13            Pailin          3            3.0
14        Phnom Penh       4316         4210.0
15      Preah Vihear          3            3.0
16         Prey Veng          4            4.0
17            Pursat          7            7.0
18        Ratanakiri          3 

In [17]:
df_all


Unnamed: 0,id,headline,price_display,rent_display,bedrooms,bathrooms,land_area,thumbnail_url,thumbnail_urls,garages,...,address_subdivision,address_locality,address_line_2,address_line_1,category_name,is_parent,type,information,source_url,language
0,204942,A flat (2 floors) near Hengly market and near ...,"$150,000",,6.0,4.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Meanchey,Stueng Mean chey,,Flat,False,residential,A flat (2 floors) near Hengly market and near ...,https://www.realestate.com.kh/204942/,en
1,211997,Twin Villa (Twin Villa) in Borey Highland 2005...,"$269,000",,4.0,7.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Sen Sok,Khmuonh,,Twin Villa,False,residential,Twin Villa (Twin Villa) in Borey Highland 2005...,https://www.realestate.com.kh/211997/,en
2,212595,house for sale,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Chamkarmon,BKK 2,117 117,House,False,residential,មាន3ជាន់ 2បន្ទប់ទឹក បន្ទប់គេង2 អាចដាក់ម៉ូតូបាន...,https://www.realestate.com.kh/212595/,km
3,211987,House for sale in Meanchey Area,"$450,000",,8.0,6.0,0.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,3.0,...,Phnom Penh,Meanchey,Stueng Mean chey 3,1 ផ្លូវលូប្រាំ(82c),Flat,False,residential,"I have a business house, I want to sell a hous...",https://www.realestate.com.kh/211987/,en
4,212653,House for Sale Urgently | Extra Space and Stai...,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Meanchey,Boeung Tumpun,"ST. 45BT #4C, ST. 45BT #4C,",Flat,False,residential,ផ្ទះល្វែងលក់បន្ទាន់ 4m * 15.5m មានជណ្ដើរកៀន ចង...,https://www.realestate.com.kh/212653/,km
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6364,246364,Land in Phnom Tamao | Selling 60% below market...,"$1,440,000",,,,80000.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Takeo,Bati,Kandoeng,,Land,False,commercial,Land in Phnom Tamao | Selling 60% below market...,https://www.realestate.com.kh/246364/,en
6365,217364,ផ្ទះសំណាក់លក់បន្ទាន់,POA,,18.0,18.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,1.0,...,Tboung Khmum,Ponhea Kraek,Kraek,72 St 72,House,False,residential,លក់ផ្ទះសំណាក់បន្ទាន់ តម្លៃសមរម្យដែលអាចចរចាបាន ...,https://www.realestate.com.kh/217364/,km
6366,231535,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale,"$100,000/m²",,,,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Tboung Khmum,Ponhea Kraek,Trapeang Phlong,,Land/Development,False,residential,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale ទីតា...,https://www.realestate.com.kh/231535/,km
6367,246457,Land For Sale,"$147,000",,,,22317.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Tboung Khmum,Tboung Khmum,Roka Po Pram,72 Pel 72C Phum,Land/Development,False,residential,This Land area is located the middle of thaila...,https://www.realestate.com.kh/246457/,en


In [18]:
df_all['language'].value_counts()

language
en       5658
km        425
zh-CN     281
ja          5
Name: count, dtype: int64

In [19]:
from deep_translator import GoogleTranslator
for i in range(len(df_all)):
    if df_all.at[i, 'language'] != 'en':
        translated = GoogleTranslator(
            source=df_all.at[i, 'language'], 
            target='en'
        ).translate(df_all.at[i, 'information'])
        df_all.at[i, 'information'] = translated


In [20]:
df_all

Unnamed: 0,id,headline,price_display,rent_display,bedrooms,bathrooms,land_area,thumbnail_url,thumbnail_urls,garages,...,address_subdivision,address_locality,address_line_2,address_line_1,category_name,is_parent,type,information,source_url,language
0,204942,A flat (2 floors) near Hengly market and near ...,"$150,000",,6.0,4.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Meanchey,Stueng Mean chey,,Flat,False,residential,A flat (2 floors) near Hengly market and near ...,https://www.realestate.com.kh/204942/,en
1,211997,Twin Villa (Twin Villa) in Borey Highland 2005...,"$269,000",,4.0,7.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Sen Sok,Khmuonh,,Twin Villa,False,residential,Twin Villa (Twin Villa) in Borey Highland 2005...,https://www.realestate.com.kh/211997/,en
2,212595,house for sale,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Chamkarmon,BKK 2,117 117,House,False,residential,"There are 3 floors 2 bedrooms, 2 bathrooms can...",https://www.realestate.com.kh/212595/,km
3,211987,House for sale in Meanchey Area,"$450,000",,8.0,6.0,0.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,3.0,...,Phnom Penh,Meanchey,Stueng Mean chey 3,1 ផ្លូវលូប្រាំ(82c),Flat,False,residential,"I have a business house, I want to sell a hous...",https://www.realestate.com.kh/211987/,en
4,212653,House for Sale Urgently | Extra Space and Stai...,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Phnom Penh,Meanchey,Boeung Tumpun,"ST. 45BT #4C, ST. 45BT #4C,",Flat,False,residential,Apartment Instast 4M * 15.5m There are 45M kit...,https://www.realestate.com.kh/212653/,km
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6364,246364,Land in Phnom Tamao | Selling 60% below market...,"$1,440,000",,,,80000.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Takeo,Bati,Kandoeng,,Land,False,commercial,Land in Phnom Tamao | Selling 60% below market...,https://www.realestate.com.kh/246364/,en
6365,217364,ផ្ទះសំណាក់លក់បន្ទាន់,POA,,18.0,18.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,1.0,...,Tboung Khmum,Ponhea Kraek,Kraek,72 St 72,House,False,residential,"Selling negotiable emergency lodges, which are...",https://www.realestate.com.kh/217364/,km
6366,231535,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale,"$100,000/m²",,,,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Tboung Khmum,Ponhea Kraek,Trapeang Phlong,,Land/Development,False,residential,Tucking Farm Sale | Durian Farm for Sale Locat...,https://www.realestate.com.kh/231535/,km
6367,246457,Land For Sale,"$147,000",,,,22317.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Tboung Khmum,Tboung Khmum,Roka Po Pram,72 Pel 72C Phum,Land/Development,False,residential,This Land area is located the middle of thaila...,https://www.realestate.com.kh/246457/,en


In [25]:
# Ensure 'latitude' and 'longitude' columns exist
if 'latitude' not in df_all.columns:
    df_all['latitude'] = None
if 'longitude' not in df_all.columns:
    df_all['longitude'] = None

# Extract latitude and longitude from the 'location' column
# The 'location' column appears to be in the format [longitude, latitude]
for idx, row in df_all.iterrows():
    location_data = row.get('location') # Use .get() to safely access 'location'
    if isinstance(location_data, list) and len(location_data) == 2:
        df_all.at[idx, 'longitude'] = location_data[0]
        df_all.at[idx, 'latitude'] = location_data[1]
    else:
        # Handle cases where location data is missing or not in expected format
        df_all.at[idx, 'longitude'] = None
        df_all.at[idx, 'latitude'] = None

In [26]:
df_all

Unnamed: 0,id,headline,price_display,rent_display,bedrooms,bathrooms,land_area,thumbnail_url,thumbnail_urls,garages,...,address_line_2,address_line_1,category_name,is_parent,type,information,source_url,language,latitude,longitude
0,204942,A flat (2 floors) near Hengly market and near ...,"$150,000",,6.0,4.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Stueng Mean chey,,Flat,False,residential,A flat (2 floors) near Hengly market and near ...,https://www.realestate.com.kh/204942/,en,11.53,104.91
1,211997,Twin Villa (Twin Villa) in Borey Highland 2005...,"$269,000",,4.0,7.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Khmuonh,,Twin Villa,False,residential,Twin Villa (Twin Villa) in Borey Highland 2005...,https://www.realestate.com.kh/211997/,en,11.55,104.89
2,212595,house for sale,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,BKK 2,117 117,House,False,residential,"There are 3 floors 2 bedrooms, 2 bathrooms can...",https://www.realestate.com.kh/212595/,km,11.554793,104.917136
3,211987,House for sale in Meanchey Area,"$450,000",,8.0,6.0,0.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,3.0,...,Stueng Mean chey 3,1 ផ្លូវលូប្រាំ(82c),Flat,False,residential,"I have a business house, I want to sell a hous...",https://www.realestate.com.kh/211987/,en,11.539648,104.886328
4,212653,House for Sale Urgently | Extra Space and Stai...,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Boeung Tumpun,"ST. 45BT #4C, ST. 45BT #4C,",Flat,False,residential,Apartment Instast 4M * 15.5m There are 45M kit...,https://www.realestate.com.kh/212653/,km,11.55,104.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6364,246364,Land in Phnom Tamao | Selling 60% below market...,"$1,440,000",,,,80000.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Kandoeng,,Land,False,commercial,Land in Phnom Tamao | Selling 60% below market...,https://www.realestate.com.kh/246364/,en,11.297113,104.822738
6365,217364,ផ្ទះសំណាក់លក់បន្ទាន់,POA,,18.0,18.0,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,1.0,...,Kraek,72 St 72,House,False,residential,"Selling negotiable emergency lodges, which are...",https://www.realestate.com.kh/217364/,km,11.761335,105.94432
6366,231535,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale,"$100,000/m²",,,,,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Trapeang Phlong,,Land/Development,False,residential,Tucking Farm Sale | Durian Farm for Sale Locat...,https://www.realestate.com.kh/231535/,km,11.709971,105.956464
6367,246457,Land For Sale,"$147,000",,,,22317.0,https://images.realestate.com.kh/__sized__/lis...,[https://images.realestate.com.kh/__sized__/li...,,...,Roka Po Pram,72 Pel 72C Phum,Land/Development,False,residential,This Land area is located the middle of thaila...,https://www.realestate.com.kh/246457/,en,12.04224,105.680894


In [27]:
df_all.to_csv('../../../data/raw/realestates_kh_v2.csv')

In [2]:
df_all = pd.read_csv('../../../data/raw/realestates_kh_v2.csv')

In [3]:
df_all

Unnamed: 0.1,Unnamed: 0,id,headline,price_display,rent_display,bedrooms,bathrooms,land_area,thumbnail_url,thumbnail_urls,...,address_line_2,address_line_1,category_name,is_parent,type,information,source_url,language,latitude,longitude
0,0,204942,A flat (2 floors) near Hengly market and near ...,"$150,000",,6.0,4.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Stueng Mean chey,,Flat,False,residential,A flat (2 floors) near Hengly market and near ...,https://www.realestate.com.kh/204942/,en,11.530000,104.910000
1,1,211997,Twin Villa (Twin Villa) in Borey Highland 2005...,"$269,000",,4.0,7.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Khmuonh,,Twin Villa,False,residential,Twin Villa (Twin Villa) in Borey Highland 2005...,https://www.realestate.com.kh/211997/,en,11.550000,104.890000
2,2,212595,house for sale,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,BKK 2,117 117,House,False,residential,"There are 3 floors 2 bedrooms, 2 bathrooms can...",https://www.realestate.com.kh/212595/,km,11.554793,104.917136
3,3,211987,House for sale in Meanchey Area,"$450,000",,8.0,6.0,0.0,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Stueng Mean chey 3,1 ផ្លូវលូប្រាំ(82c),Flat,False,residential,"I have a business house, I want to sell a hous...",https://www.realestate.com.kh/211987/,en,11.539648,104.886328
4,4,212653,House for Sale Urgently | Extra Space and Stai...,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Boeung Tumpun,"ST. 45BT #4C, ST. 45BT #4C,",Flat,False,residential,Apartment Instast 4M * 15.5m There are 45M kit...,https://www.realestate.com.kh/212653/,km,11.550000,104.930000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6364,6364,246364,Land in Phnom Tamao | Selling 60% below market...,"$1,440,000",,,,80000.0,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Kandoeng,,Land,False,commercial,Land in Phnom Tamao | Selling 60% below market...,https://www.realestate.com.kh/246364/,en,11.297113,104.822738
6365,6365,217364,ផ្ទះសំណាក់លក់បន្ទាន់,POA,,18.0,18.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Kraek,72 St 72,House,False,residential,"Selling negotiable emergency lodges, which are...",https://www.realestate.com.kh/217364/,km,11.761335,105.944320
6366,6366,231535,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale,"$100,000/m²",,,,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Trapeang Phlong,,Land/Development,False,residential,Tucking Farm Sale | Durian Farm for Sale Locat...,https://www.realestate.com.kh/231535/,km,11.709971,105.956464
6367,6367,246457,Land For Sale,"$147,000",,,,22317.0,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Roka Po Pram,72 Pel 72C Phum,Land/Development,False,residential,This Land area is located the middle of thaila...,https://www.realestate.com.kh/246457/,en,12.042240,105.680894


In [8]:
base_listing_url = "https://www.realestate.com.kh/"


In [9]:
# Create new columns if they don't already exist in df_all
if 'information' not in df_all.columns:
    df_all['information'] = None
if 'source_url' not in df_all.columns: # New column to store the URL of the scraped page
    df_all['source_url'] = None
if 'land_size' not in df_all.columns: # New column for land size
    df_all['land_size'] = None
if 'house_size' not in df_all.columns: # New column for house size
    df_all['house_size'] = None


# Helper function to extract land size from text
def extract_land_size(description_text):
    """
    Extracts land size from a given text description using regex.
    Looks for patterns like "Land size: 4m x 22.5m" or "Land area: 150 sqm".
    Returns the extracted string or None if not found.
    """
    if not description_text:
        return None

    # Pattern to capture dimensions (e.g., Xm x Ym) or single value with unit (e.g., X sqm)
    # The regex looks for keywords like "Land size", "Land area", "Land" followed by a colon or space,
    # then captures the actual size pattern.
    land_pattern = re.compile(
        r"(?:land\s*size|land\s*area|land)\s*[:\s]*"
        r"((?:\d+(?:\.\d+)?\s*m?\s*[xX]\s*\d+(?:\.\d+)?\s*m?)" # Dimensions like Xm x Ym
        r"|(?:\d+(?:\.\d+)?\s*(?:sqm|m2|m²|ha|hectare(?:s)?|sq\.\s*m|square\s*meter(?:s)?|square\s*metre(?:s)?)?))", # Single value with unit (optional unit if just a number)
        re.IGNORECASE
    )
    match = land_pattern.search(description_text)
    if match:
        return match.group(1).strip() # group(1) captures the actual size part
    return None

# Helper function to extract house size from text
def extract_house_size(description_text):
    """
    Extracts house/building size from a given text description using regex.
    Looks for patterns like "House size: 4m x 16m" or "Building size: 200 sqm".
    Returns the extracted string or None if not found.
    """
    if not description_text:
        return None

    # Pattern to capture dimensions or single value with unit for house/building size.
    # The regex looks for keywords like "House size", "House area", "Building size", "Building area", "House"
    # followed by a colon or space, then captures the actual size pattern.
    house_pattern = re.compile(
        r"(?:house\s*size|house\s*area|building\s*size|building\s*area|house)\s*[:\s]*"
        r"((?:\d+(?:\.\d+)?\s*m?\s*[xX]\s*\d+(?:\.\d+)?\s*m?)" # Dimensions like Xm x Ym
        r"|(?:\d+(?:\.\d+)?\s*(?:sqm|m2|m²|ha|hectare(?:s)?|sq\.\s*m|square\s*meter(?:s)?|square\s*metre(?:s)?)?))", # Single value with unit (optional unit if just a number)
        re.IGNORECASE
    )
    match = house_pattern.search(description_text)
    if match:
        return match.group(1).strip() # group(1) captures the actual size part
    return None


# Define the scraping function
def fetch_info(index, id_value):
    """
    Fetches detailed information, source URL, land size, and house size for a single property listing.

    Args:
        index (int): The index of the row in the DataFrame being processed.
        id_value (str/int): The unique ID of the property listing.

    Returns:
        tuple: (index, raw_text, temp_url, land_size, house_size) if successful, or
               (index, None, temp_url, None, None) if an error occurs or data not found.
    """
    temp_url = base_listing_url + str(id_value) + '/'
    raw_text = None
    land_size = None # Initialize land_size
    house_size = None # Initialize house_size
    try:
        response = requests.get(temp_url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, "html.parser")
        span = soup.find("span", class_="css-zrj3zm")
        if span:
            raw_text = span.get_text(separator=" ", strip=True)
            land_size = extract_land_size(raw_text) # Extract land size
            house_size = extract_house_size(raw_text) # Extract house size
            print(f"[{index}] ✔ Success: {temp_url}")
            return index, raw_text, temp_url, land_size, house_size # Return both sizes
        else:
            print(f"[{index}] ✘ No span found: {temp_url}")
            return index, None, temp_url, None, None # Return None for both sizes
    except requests.exceptions.RequestException as e:
        print(f"[{index}] ⚠ HTTP Request Error for {temp_url}: {e}")
        return index, None, temp_url, None, None # Return None for both sizes on error
    except Exception as e:
        print(f"[{index}] ⚠ Unexpected Error for {temp_url}: {e}")
        return index, None, temp_url, None, None # Return None for both sizes on error

# Use ThreadPoolExecutor for parallel execution
with ThreadPoolExecutor(max_workers=25) as executor:
    futures = [
        executor.submit(fetch_info, i, df_all.iloc[i]['id']) for i in range(len(df_all))
    ]

    for future in as_completed(futures):
        index, result_text, result_url, extracted_land_size, extracted_house_size = future.result() # Unpack both sizes
        df_all.at[index, 'information'] = result_text
        df_all.at[index, 'source_url'] = result_url
        df_all.at[index, 'land_size'] = extracted_land_size # Assign land_size
        df_all.at[index, 'house_size'] = extracted_house_size # Assign house_size


[23] ✔ Success: https://www.realestate.com.kh/228347/
[21] ✔ Success: https://www.realestate.com.kh/235048/
[2] ✔ Success: https://www.realestate.com.kh/212595/
[4] ✔ Success: https://www.realestate.com.kh/212653/
[13] ✔ Success: https://www.realestate.com.kh/145091/
[8] ✔ Success: https://www.realestate.com.kh/218736/
[6] ✔ Success: https://www.realestate.com.kh/218884/
[14] ✔ Success: https://www.realestate.com.kh/153997/
[7] ✔ Success: https://www.realestate.com.kh/218563/
[0] ✔ Success: https://www.realestate.com.kh/204942/
[17] ✔ Success: https://www.realestate.com.kh/232920/
[10] ✔ Success: https://www.realestate.com.kh/229653/
[22] ✔ Success: https://www.realestate.com.kh/235065/
[20] ✔ Success: https://www.realestate.com.kh/231183/
[5] ✔ Success: https://www.realestate.com.kh/217976/
[15] ✔ Success: https://www.realestate.com.kh/156169/
[9] ✔ Success: https://www.realestate.com.kh/127135/
[19] ✔ Success: https://www.realestate.com.kh/230541/
[3] ✔ Success: https://www.realestat

In [10]:
df_all

Unnamed: 0.1,Unnamed: 0,id,headline,price_display,rent_display,bedrooms,bathrooms,land_area,thumbnail_url,thumbnail_urls,...,category_name,is_parent,type,information,source_url,language,latitude,longitude,land_size,house_size
0,0,204942,A flat (2 floors) near Hengly market and near ...,"$150,000",,6.0,4.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Flat,False,residential,A flat (2 floors) near Hengly market and near ...,https://www.realestate.com.kh/204942/,en,11.530000,104.910000,4m x 22.5m,4m x 16m
1,1,211997,Twin Villa (Twin Villa) in Borey Highland 2005...,"$269,000",,4.0,7.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Twin Villa,False,residential,Twin Villa (Twin Villa) in Borey Highland 2005...,https://www.realestate.com.kh/211997/,en,11.550000,104.890000,2005,6m x 12m
2,2,212595,house for sale,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,House,False,residential,មាន3ជាន់ 2បន្ទប់ទឹក បន្ទប់គេង2 អាចដាក់ម៉ូតូបាន...,https://www.realestate.com.kh/212595/,km,11.554793,104.917136,,
3,3,211987,House for sale in Meanchey Area,"$450,000",,8.0,6.0,0.0,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Flat,False,residential,"I have a business house, I want to sell a hous...",https://www.realestate.com.kh/211987/,en,11.539648,104.886328,,5x18
4,4,212653,House for Sale Urgently | Extra Space and Stai...,"$85,000",,2.0,2.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Flat,False,residential,ផ្ទះល្វែងលក់បន្ទាន់ 4m * 15.5m មានជណ្ដើរកៀន ចង...,https://www.realestate.com.kh/212653/,km,11.550000,104.930000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6364,6364,246364,Land in Phnom Tamao | Selling 60% below market...,"$1,440,000",,,,80000.0,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Land,False,commercial,Land in Phnom Tamao | Selling 60% below market...,https://www.realestate.com.kh/246364/,en,11.297113,104.822738,8 hectares,
6365,6365,217364,ផ្ទះសំណាក់លក់បន្ទាន់,POA,,18.0,18.0,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,House,False,residential,លក់ផ្ទះសំណាក់បន្ទាន់ តម្លៃសមរម្យដែលអាចចរចាបាន ...,https://www.realestate.com.kh/217364/,km,11.761335,105.944320,,
6366,6366,231535,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale,"$100,000/m²",,,,,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Land/Development,False,residential,ដី ចំការធូរ៉េន លក់ | Durian Farm For Sale ទីតា...,https://www.realestate.com.kh/231535/,km,11.709971,105.956464,50 Hectares,
6367,6367,246457,Land For Sale,"$147,000",,,,22317.0,https://images.realestate.com.kh/__sized__/lis...,['https://images.realestate.com.kh/__sized__/l...,...,Land/Development,False,residential,This Land area is located the middle of thaila...,https://www.realestate.com.kh/246457/,en,12.042240,105.680894,,


In [None]:
# --- Define reverse geocoding function ---
def reverse_geocode(lat, lon):
    """
    Reverse geocodes latitude and longitude using Nominatim API to get administrative divisions.
    Adheres to Nominatim usage policy by including a User-Agent and sleeping after each request.
    """
    url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1"
    nominatim_headers = {
        "User-Agent": "RealEstateScraperForCambodia/1.0 (zinhour10@gmail.com)"  # Replace with your real email
    }
    try:
        response = requests.get(url, headers=nominatim_headers, timeout=10)
        response.raise_for_status()
        data = response.json()
        address = data.get('address', {})

        province = address.get('state')
        district = address.get('city_district') or address.get('county') or address.get('suburb')
        commune = address.get('suburb') or address.get('village') or address.get('town') or address.get('neighbourhood')

        if province == "Phnom Penh":
            if 'city_district' in address:
                district = address['city_district']
            if 'suburb' in address:
                commune = address['suburb']
            elif 'neighbourhood' in address:
                commune = address['neighbourhood']

        return province, district, commune

    except requests.exceptions.RequestException as e:
        print(f"Reverse geocoding failed for {lat}, {lon} (HTTP/Network error): {e}")
        return None, None, None
    except Exception as e:
        print(f"An unexpected error during reverse geocoding for {lat}, {lon}: {e}")
        return None, None, None
    finally:
        time.sleep(1.1)  # Rate limit: max 1 request/sec per Nominatim policy


# --- Prepare DataFrame ---
# (You should load or define your df_all here)
# Example placeholder if needed:
# df_all = pd.DataFrame({
#     'location': ['Loc1', 'Loc2'],
#     'latitude': [11.55, 13.37],
#     'longitude': [104.92, 103.85],
#     'address_subdivision': ['Some Address', 'Another Address']
# })

# Add geocode columns
df_all['province_geocoded'] = None
df_all['district_geocoded'] = None
df_all['commune_geocoded'] = None

print("\nStarting reverse geocoding for location data...")

# Filter only rows with valid coordinates
rows_to_geocode = df_all[df_all['latitude'].notna() & df_all['longitude'].notna()]

# Perform geocoding with rate-limited concurrency
with ThreadPoolExecutor(max_workers=2) as executor:
    futures = {
        executor.submit(reverse_geocode, row['latitude'], row['longitude']): idx
        for idx, row in rows_to_geocode.iterrows()
    }

    for i, future in enumerate(as_completed(futures)):
        original_idx = futures[future]
        province, district, commune = future.result()
        df_all.at[original_idx, 'province_geocoded'] = province
        df_all.at[original_idx, 'district_geocoded'] = district
        df_all.at[original_idx, 'commune_geocoded'] = commune

        if (i + 1) % 10 == 0:
            print(f"Geocoded {i + 1} of {len(futures)} locations...")

print("Reverse geocoding complete.")
print("\nDataFrame with geocoded location details (first 5 rows):")
print(df_all[['location', 'latitude', 'longitude', 'address_subdivision',
              'province_geocoded', 'district_geocoded', 'commune_geocoded']].head())


Starting reverse geocoding for location data...
Geocoded 10 of 6369 locations...
Geocoded 20 of 6369 locations...
Geocoded 30 of 6369 locations...
Geocoded 40 of 6369 locations...
Geocoded 50 of 6369 locations...
Geocoded 60 of 6369 locations...
Geocoded 70 of 6369 locations...
Geocoded 80 of 6369 locations...
Geocoded 90 of 6369 locations...
Geocoded 100 of 6369 locations...
Geocoded 110 of 6369 locations...
Geocoded 120 of 6369 locations...
Geocoded 130 of 6369 locations...
Geocoded 140 of 6369 locations...
Geocoded 150 of 6369 locations...
Geocoded 160 of 6369 locations...
Geocoded 170 of 6369 locations...
Geocoded 180 of 6369 locations...
Geocoded 190 of 6369 locations...
Geocoded 200 of 6369 locations...
Geocoded 210 of 6369 locations...
Geocoded 220 of 6369 locations...
Geocoded 230 of 6369 locations...
Geocoded 240 of 6369 locations...
Geocoded 250 of 6369 locations...
Geocoded 260 of 6369 locations...
Geocoded 270 of 6369 locations...
Geocoded 280 of 6369 locations...
Geocoded

: 

: 