In [1]:
# Midland Residential Estates

In [28]:
import pandas as pd
import requests
from requests.exceptions import RequestException
from tqdm import tqdm

def load_data(file_path):
    df = pd.read_excel(file_path)
    df.drop_duplicates(subset=['m_district_code', 'm_district'], inplace=True)
    return df



def fetch_estates_data(district_code, limit=500):
    base_url = "https://data.midland.com.hk/search/v2/estates"
    results = []
    page = 1
    while True:
        params = {
            "ad": "true",
            "lang": "en",
            "currency": "HKD",
            "unit": "feet",
            "search_behavior": "normal",
            "intsmdist_ids": district_code,
            "page": page,
            "limit": limit
        }
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 \
                Satari/537.36 Edg/131.0.0.0', 
            'authorization':'''Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJndWlkIjoibXItMjAyNC0xMi0wNy0tLVlWN0hKU2QxelRzOHpwVDhJNEdjdGxLcjQ1Z0l4cWhsdVp3SEdvZXVSX1o3RkU2cmh1Q1NjVVpqM1E3SXIzZWVQSmZpMy1JSSIsImF1ZCI6Im15cGFnZWFwcC1tbm5rYiIsInN1YiI6Im1yLTIwMjQtMTItMDctLS1ZVjdISlNkMXpUczh6cFQ4STRHY3RsS3I0NWdJeHFobHVad0hHb2V1Ul9aN0ZFNnJodUNTY1VaajNRN0lyM2VlUEpmaTMtSUkiLCJpYXQiOjE3MzM1NDk0MjUsImV4cCI6MTc2ODEwOTQyNSwiaXNzIjoiZGF0YS5taWRsYW5kLmNvbS5oayJ9.LOOVgc_Nw7OPNnAlB8iC1kRHL0W8UVNVa0GaJYaxTxVZtO33ZbkR64rxMHSifvZOzYr38aJENj-SDIbkq4Y75CxqMPegyBUgHtaub-Fez5qaH2W0Dz71pUdYijDG3rB4Dkbdf8k21QsHerJmOFnpryzTVnZDxv-3g8Lmjz2WUhmrqMamKox3w-T9wRJ4p_wzcJwvXWgtvxkapr3Ep0YSJy3fJsV-Nwm_QiJf2JR0V4rOAu7f-YLMSy7IYje3W-HvVqAZV2cDphg_cYnf6CpirJPu_ix2z6BtIMpYMXeSiZyZtKCHiWFNtUm6QTD2adArWtLl_NvbgcH9mhVYuWi8NcrZBdBh4c72bSNRm104oEbRb9-vb1AylH2oFkEz33xXXEAJRtbQxoQ3qZj_yoDIexrinOSlkJB50fSu98Xizv9eZstnbtzkgVjfKpOAWQFdHKennjN9Azq6yTlejDVspL7A0JsY4ZlO4HQNdkNhiOQDYypHgx8jQMm0B0rbaa0cEz1S0s43Lh01eNVBN9Is35jAWFsJIP-iLvHqXJ9d0pGoHe0N7PQk2dmLo9E5szP0U04MZxt4m9TEpJkn-0uS_ZDSVABlBU2KGIkTmuzm1VltsDhPhoNrbJBJVdxJJdublpDnVFk8aO1gFWKNzptw48ipmLfpRosynC_x3Ud6QMU'''
        }
        
        try: 
            reponse = requests.get(base_url, params=params, headers=headers)
            reponse.raise_for_status()
            data = reponse.json()
            
            if 'result' in data and data['result']:
                processed_data = process_nested_data(data['result'])
                results.extend(processed_data)
                page += 1
            else:
                break
            
        except RequestException as e:
            print(f"Stopped fetching data at page {page} for district code {district_code} due to error: {e}")
            break
    
    return results

def process_nested_data(data):
    """
    Flattens nested JSON structures into a flat dictionary using pandas.json_normalize.
    Handles nested fields like 'amenities' and 'market_stat_monthly'.
    """
    flattened_data = []

    for item in data:
        # Parse 'amenities' field (list of dictionaries) into separate columns
        if 'amenities' in item and isinstance(item['amenities'], str):
            try:
                amenities = ast.literal_eval(item['amenities'])
                for idx, amenity in enumerate(amenities, start=1):
                    item[f"amenity_{idx}_type"] = amenity.get('type', None)
                    item[f"amenity_{idx}_name"] = amenity.get('name', None)
                    item[f"amenity_{idx}_walking_minute"] = amenity.get('walking_minute', None)
            except (ValueError, SyntaxError):
                pass  # Leave amenities unprocessed if parsing fails
        item.pop('amenities', None)  # Remove original amenities field

        # Normalize 'market_stat_monthly' field (list of dictionaries) into rows
        if 'market_stat_monthly' in item and isinstance(item['market_stat_monthly'], str):
            try:
                monthly_stats = ast.literal_eval(item['market_stat_monthly'])
                for monthly_stat in monthly_stats:
                    new_record = item.copy()
                    new_record.update({
                        "monthly_date": monthly_stat.get('date'),
                        "monthly_avg_net_ft_price": monthly_stat.get('avg_net_ft_price'),
                    })
                    flattened_data.append(new_record)
            except (ValueError, SyntaxError):
                pass  # Leave market_stat_monthly unprocessed if parsing fails
        else:
            flattened_data.append(item)

    return flattened_data

def process_estate_data(df):
    all_estate_data = []
    for _, row in tqdm(df[:5].iterrows(), total=df.shape[0], desc="Processing districts"):
        district_data = fetch_estates_data(row['m_district_code'], limit=500)
        all_estate_data.extend(district_data)
        
    return all_estate_data

def main():
    df = load_data("midland_res_area_code.xlsx")
    estate_data = process_estate_data(df)
    
    if estate_data:
        estate_df = pd.DataFrame(estate_data)
        
        # Save to CSV with normalized data
        estate_df.to_csv("midland_estates.csv", index=False, encoding='utf-8')
        print("Data saved to midland_estates.csv")
    else:
        print("No data fetched")   
    
if __name__ == "__main__":
    main()

Processing districts:   4%|▍         | 5/130 [00:03<01:28,  1.41it/s]

Data saved to midland_estates.csv



