In [3]:
import pandas as pd
import ast
import json
from collections.abc import MutableMapping

def flatten_json(nested_json, parent_key='', sep='_'):
    """Flatten nested JSON structures"""
    items = []
    if isinstance(nested_json, list):
        for i, element in enumerate(nested_json):
            items.extend(flatten_json(element, f'{parent_key}{sep}{i}', sep=sep).items())
    elif isinstance(nested_json, dict):
        for k, v in nested_json.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, (dict, list)):
                items.extend(flatten_json(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
    return dict(items)

def normalize_complex_columns(df, json_columns):
    """Handle all JSON-like structures including nested dicts and lists"""
    for col in json_columns:
        # Convert string representations to Python objects
        df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
        
        # Process each row
        expanded_data = []
        for idx, entry in df[col].items():
            if isinstance(entry, (dict, list)):
                flattened = flatten_json(entry)
                expanded_data.append(flattened)
            else:
                expanded_data.append({})
        
        # Create DataFrame from expanded data
        expanded_df = pd.DataFrame(expanded_data).add_prefix(f'{col}_')
        
        # Merge back with original DataFrame
        df = pd.concat([df.drop(col, axis=1), expanded_df], axis=1)
    
    return df

# Sample usage for Midland data
midland_estates = pd.read_csv('midland_estates.csv')
json_columns = ['sm_district', 'region', 'subregion', 'district', 'combined_district',
                'int_district', 'int_sm_district', 'location', 'developer',
                'property_stat', 'market_stat', 'index_component_estate',
                'parent_estate']

midland_normalized = normalize_complex_columns(midland_estates.copy(), json_columns)

# For list-like structures (e.g., [{'id':..., 'name':...}])
list_columns = midland_normalized.filter(regex='_\\d+_').columns
for col in list_columns:
    base_name = col.split('_0_')[0]
    midland_normalized[base_name] = midland_normalized.filter(regex=f'^{base_name}_\\d+_').apply(
        lambda x: x.dropna().to_dict() if x.notna().any() else None, axis=1
    )
    midland_normalized.drop(midland_normalized.filter(regex=f'^{base_name}_\\d+_').columns, 
                          axis=1, inplace=True)

# Save normalized data
midland_normalized.to_csv('midland_estates_lv_2.csv', index=False)
