In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import cm as mpl_cm
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from branca.colormap import LinearColormap

# =======================
# CONFIGURATION
# =======================
W1_EV_COUNT = 1.0
W2_AADT = 0.0
EV_POWER = 'Battery/Fuel-cell electric'
ZERO_PLUG_PROXY_RATIO = 99999

NSW_POSTCODE_RANGES = [
    (1000, 2599),
    (2620, 2899),
    (2921, 2999)
]

CHARGER_FILE = 'ev_chargers_consolidated_sep25.csv'
REGISTRATION_FILE = 'vehicles registration.csv'

OUTPUT_CSV = 'output/nsw_integrated_demand_supply_metrics.csv'
OUTPUT_SUMMARY_MD = 'output/full_analysis_summary.md'
OUTPUT_MAP_HTML = 'output/nsw_ev_heatmap.html'

ANALYSIS_JOIN_COLUMN = 'pcode'
os.makedirs('output', exist_ok=True)


# =======================
# PLOTTING UTIL
# =======================
def create_bar_plot(df, x_col, y_col, title, x_label, y_label, color, output_file, is_ratio=False, ascending=False):
    df_sorted = df.sort_values(by=y_col, ascending=ascending).head(10).copy()

    if not is_ratio:
        df_sorted = df_sorted[df_sorted[y_col] > 0]

    df_sorted[x_col] = df_sorted[x_col].astype(str)

    if is_ratio:
        df_sorted = df_sorted[df_sorted[y_col] <= ZERO_PLUG_PROXY_RATIO]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(df_sorted[x_col], df_sorted[y_col], color=color)

    for bar in bars:
        yval = bar.get_height()
        if y_col == 'ev_proportion':
            label = f'{yval:.1%}'
        elif is_ratio:
            label = f'{yval:.1f}'
        else:
            label = f'{int(yval):,}'
        plt.text(bar.get_x() + bar.get_width()/2, yval * 1.01, label,
                 ha='center', va='bottom', fontsize=9)

    plt.title(title, fontsize=16)
    plt.xlabel(x_label, fontsize=12)
    plt.ylabel(y_label, fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.savefig(output_file, dpi=300)
    plt.close()


# =======================
# DATA PROCESSING
# =======================
def process_ev_registration_data(file_path):
    print("1. Processing EV Registration Data...")

    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip().str.lower()

    df.rename(columns={'registered_postcode': 'pcode'}, inplace=True)
    df['no_vehicles'] = pd.to_numeric(df['no_vehicles'], errors='coerce')
    df.dropna(subset=['no_vehicles'], inplace=True)

    df['pcode'] = df['pcode'].astype(str).str.extract(r'(\d+)')
    df.dropna(subset=['pcode'], inplace=True)
    df['pcode'] = df['pcode'].astype(str)

    df['pcode_int'] = df['pcode'].astype(int)

    nsw_mask = np.logical_or.reduce([
        (df['pcode_int'] >= start) & (df['pcode_int'] <= end)
        for start, end in NSW_POSTCODE_RANGES
    ])

    df_nsw = df[nsw_mask].copy()

    df_ev = df_nsw[df_nsw['motive_power'].str.strip() == EV_POWER]
    df_ev_count = df_ev.groupby('pcode')['no_vehicles'].sum().rename('ev_count').reset_index()

    df_total = df_nsw.groupby('pcode')['no_vehicles'].sum().rename('total_vehicles').reset_index()

    df_out = df_total.merge(df_ev_count, on='pcode', how='left')
    df_out['ev_count'] = df_out['ev_count'].fillna(0).astype(int)
    df_out['ev_proportion'] = np.where(df_out['total_vehicles'] == 0, 0, df_out['ev_count'] / df_out['total_vehicles'])

    print(f"  → Computed EV demand for {len(df_out):,} NSW postcodes.")
    return df_out


def process_charger_data(file_path):
    print("2. Processing Charger Data...")
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip().str.lower()

    df.rename(columns={'number_of_plugs': 'num_plugs', 'number_of_stations': 'num_stations'}, inplace=True)
    df['pcode'] = df['pcode'].astype(str).str.extract(r'(\d+)')

    df_active = df[~df['operator'].str.contains('Upcoming', na=False, case=False)]
    df_active.dropna(subset=['pcode', 'latitude', 'longitude', 'num_plugs'], inplace=True)

    df_active['num_plugs'] = pd.to_numeric(df_active['num_plugs'], errors='coerce').fillna(0).astype(int)
    df_active['num_stations'] = pd.to_numeric(df_active['num_stations'], errors='coerce').fillna(1).astype(int)

    df_capacity = df_active.groupby('pcode').agg(
        total_plugs=('num_plugs', 'sum'),
        total_stations=('num_stations', 'sum'),
        avg_latitude=('latitude', 'mean'),
        avg_longitude=('longitude', 'mean')
    ).reset_index()

    df_active_stations = df_active[['latitude', 'longitude', 'operator', 'num_plugs', 'pcode']]

    print(f"  → Summarized charging infrastructure for {len(df_capacity):,} postcodes.")
    return df_capacity, df_active_stations


def calculate_metrics(df_demand, df_capacity):
    print("3. Combining datasets and calculating metrics...")

    df = df_demand.merge(df_capacity, on=ANALYSIS_JOIN_COLUMN, how='left')

    df[['total_plugs', 'total_stations']] = df[['total_plugs', 'total_stations']].fillna(0).astype(int)
    df[['ev_count', 'total_vehicles']] = df[['ev_count', 'total_vehicles']].fillna(0).astype(int)

    df['ev_proportion'] = df['ev_proportion'].fillna(0.0)
    df['avg_latitude'] = df['avg_latitude'].fillna(df['avg_latitude'].mean())
    df['avg_longitude'] = df['avg_longitude'].fillna(df['avg_longitude'].mean())

    df['ev_to_charger_ratio'] = np.where(
        df['total_plugs'] == 0,
        np.where(df['ev_count'] > 0, ZERO_PLUG_PROXY_RATIO, 0),
        df['ev_count'] / df['total_plugs']
    )

    df['scaled_ev_count'] = df['ev_count'] / df['ev_count'].max()
    df['scaled_aadt'] = 0.0  # Placeholder for future use

    df['need_score'] = (W1_EV_COUNT * df['scaled_ev_count']) + (W2_AADT * df['scaled_aadt'])

    print(f"  → Metrics computed for {len(df):,} suburbs.")
    return df


# =======================
# OUTPUT + MAP
# =======================
def generate_outputs(df):
    print("4. Generating summary output files...")

    final_cols = [
        'pcode', 'ev_count', 'total_vehicles', 'ev_proportion',
        'total_plugs', 'total_stations', 'avg_latitude', 'avg_longitude',
        'ev_to_charger_ratio', 'need_score'
    ]

    df[final_cols].to_csv(OUTPUT_CSV, index=False)
    print(f"  → Data saved to {OUTPUT_CSV}")

    create_bar_plot(df, 'pcode', 'ev_count', 'Top 10 NSW Postcodes by EV Count', 'Postcode', 'EV Count', '#0077B6', 'output/top_ev_count.png')
    create_bar_plot(df[df['total_vehicles'] >= 50], 'pcode', 'ev_proportion', 'Top 10 NSW Postcodes by EV Proportion', 'Postcode', 'EV Share', '#00B4D8', 'output/top_ev_proportion.png')
    create_bar_plot(df, 'pcode', 'total_plugs', 'Top 10 by Total EV Plug Capacity', 'Postcode', 'Plugs', '#48CAE4', 'output/top_capacity.png')
    create_bar_plot(df[(df['ev_to_charger_ratio'] < ZERO_PLUG_PROXY_RATIO) & (df['ev_count'] >= 10)],
                    'pcode', 'ev_to_charger_ratio', 'Top 10 Most Underserved', 'Postcode', 'EV/Plug Ratio', '#F77F00',
                    'output/top_underserved_ratio.png', is_ratio=True)

    with open(OUTPUT_SUMMARY_MD, 'w') as f:
        f.write(f"# NSW EV Demand & Supply Summary\n\nTotal EVs: {df['ev_count'].sum():,}\nTotal Plugs: {df['total_plugs'].sum():,}\n")

    print(f"  → Summary saved to {OUTPUT_SUMMARY_MD}")


def generate_folium_map(df, stations):
    print("5. Generating interactive map...")

    lat_center = df['avg_latitude'].mean()
    lon_center = df['avg_longitude'].mean()

    m = folium.Map(location=[lat_center, lon_center], zoom_start=7, tiles='CartoDB dark_matter')

    layers = {
        'EV Count': 'ev_count',
        'EV Plugs': 'total_plugs',
        'EV-to-Charger Ratio': 'ev_to_charger_ratio'
    }

    for layer_name, col in layers.items():
        fg = folium.FeatureGroup(name=layer_name)

        vals = df[col].replace(ZERO_PLUG_PROXY_RATIO, np.nan).dropna()
        vmin, vmax = vals.min(), vals.max()
        vmin_display = vmin + (vmax - vmin) * 0.05 if vmax > vmin else vmin

        cm_name = 'YlOrRd' if col == 'ev_count' else ('YlGn' if col == 'total_plugs' else 'YlGnBu')
        colormap = LinearColormap(
            colors=[matplotlib.colors.rgb2hex(c) for c in mpl_cm.get_cmap(cm_name)(np.linspace(0.2, 1, 256))],
            vmin=vmin_display, vmax=vmax, caption=layer_name
        )

        for _, r in df.iterrows():
            value = r[col] if r[col] != ZERO_PLUG_PROXY_RATIO else 0
            color = colormap(value)

            popup = folium.Popup(
                f"<b>Postcode:</b> {r['pcode']}<br>"
                f"<b>EV Count:</b> {r['ev_count']}<br>"
                f"<b>Plugs:</b> {r['total_plugs']}<br>"
                f"<b>Ratio:</b> {r['ev_to_charger_ratio']:.1f}", max_width=300)

            folium.CircleMarker(
                location=[r['avg_latitude'], r['avg_longitude']],
                radius=7,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7,
                popup=popup
            ).add_to(fg)

        colormap.add_to(m)
        fg.add_to(m)

    mc = MarkerCluster(name='EV Charging Stations').add_to(m)
    for _, r in stations.iterrows():
        folium.Marker(
            location=[r['latitude'], r['longitude']],
            popup=f"{r['operator']} ({r['num_plugs']} plugs)",
            icon=folium.Icon(color='blue', icon='bolt', prefix='fa')
        ).add_to(mc)

    folium.LayerControl().add_to(m)
    m.save(OUTPUT_MAP_HTML)
    print(f"  → Map saved to {OUTPUT_MAP_HTML}")


# =======================
# RUN PIPELINE
# =======================
def run_pipeline():
    print("--- Starting EV Demand & Supply Analysis ---")
    demand = process_ev_registration_data(REGISTRATION_FILE)
    capacity, stations = process_charger_data(CHARGER_FILE)
    merged = calculate_metrics(demand, capacity)
    generate_outputs(merged)
    generate_folium_map(merged, stations)

if __name__ == '__main__':
    run_pipeline()

--- Starting EV Demand & Supply Analysis ---
1. Processing EV Registration Data...
  → Computed EV demand for 656 NSW postcodes.
2. Processing Charger Data...
  → Summarized charging infrastructure for 373 postcodes.
3. Combining datasets and calculating metrics...
  → Metrics computed for 656 suburbs.
4. Generating summary output files...
  → Data saved to output/nsw_integrated_demand_supply_metrics.csv
  → Summary saved to output/full_analysis_summary.md
5. Generating interactive map...


  colors=[matplotlib.colors.rgb2hex(c) for c in mpl_cm.get_cmap(cm_name)(np.linspace(0.2, 1, 256))],


  → Map saved to output/nsw_ev_heatmap.html
