In [2]:
import time
import pandas as pd
from tqdm import tqdm
import chromedriver_autoinstaller
from selenium import webdriver
from selenium.webdriver.common.by import By

def initialize_driver():
    """
    Automatically installs and initializes ChromeDriver in headless mode.
    """
    chromedriver_autoinstaller.install()
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Enable headless mode
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(options=options)

# Initialize Selenium driver
driver = initialize_driver()

# Navigate to the building list page
url = "https://www.midlandici.com.hk/ics/property/building/list?lang=english"
driver.get(url)
time.sleep(3)  # Wait for the page to load completely

# Find all building card elements.
# The card element has a CSS class "building" (e.g. "div.building")
building_elements = driver.find_elements(By.CSS_SELECTOR, "div.building")
print(f"Found {len(building_elements)} building elements.")

# Prepare a list to hold data for each building
buildings_data = []

# Iterate over each building card using tqdm for progress tracking
for element in tqdm(building_elements, desc="Scraping buildings"):
    try:
        # The building details are contained within the <a> tag
        a_element = element.find_element(By.TAG_NAME, "a")
        detail_href = a_element.get_attribute("href")
        
        # Extract the building name from the child with class "building-name"
        try:
            name_element = a_element.find_element(By.CSS_SELECTOR, "div.building-name")
            building_name = name_element.text.strip()
        except Exception:
            building_name = None
        
        # Extract the address from the child with class "address"
        try:
            address_element = a_element.find_element(By.CSS_SELECTOR, "div.address")
            address = address_element.text.strip()
        except Exception:
            address = None
        
        buildings_data.append({
            "Building Name": building_name,
            "Address": address,
            "Detail URL": detail_href
        })
    except Exception as e:
        print(f"Error processing element: {e}")

# Convert the list into a DataFrame and save to CSV
df_buildings = pd.DataFrame(buildings_data)
output_file = "midlandici_building_list.csv"
df_buildings.to_csv(output_file, index=False)
print(f"Scraped building data saved to: {output_file}")

driver.quit()


Found 963 building elements.


Scraping buildings: 100%|██████████| 963/963 [00:23<00:00, 40.83it/s]


Scraped building data saved to: midlandici_building_list.csv
