In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text

In [None]:
# Load data
county_ba = pd.read_csv('../../../data/county_to_ba.csv')
ba_to_state = pd.read_csv('../../../data/ba_to_state.csv')
aeo_2023 = pd.read_csv('../../../data/demand_AEO_2023_reference.csv')
county_map = pd.read_csv('../../../data/dgen_county_fips_mapping.csv')
load_growth = pd.read_csv('../../../data/load_growth_to_model.csv')

In [None]:
# Adjust county_ba for one PA county
county_ba['ba'] = np.where(
    (county_ba['ba'] == 'p119') | (county_ba['ba'] == 'p122'),
    'p115',
    county_ba['ba']
)

In [None]:
# First extract unique combinations from balance area to state mapping
ba_to_state_unique = ba_to_state[['ba', 'state']].drop_duplicates(subset=['ba', 'state'])

In [None]:
# Re-base AEO 2023 data to 2026

# Get the 2026 multiplier per state
base_multipliers = aeo_2023[aeo_2023['year'] == 2026][['state', 'multiplier']].rename(columns={'multiplier': 'base_multiplier'})

# Merge with the original DataFrame
aeo_2023 = aeo_2023.merge(base_multipliers, on='state', how='left')

# Rebase the multiplier
aeo_2023['multiplier'] = aeo_2023['multiplier'] / aeo_2023['base_multiplier']

# Only years after 2025
aeo_2023 = aeo_2023[aeo_2023['year'] > 2025]

# Join load growth data with ba_to_state_unique to get ba associated with load growth multipliers
aeo_2023_ba = aeo_2023.merge(ba_to_state_unique, on='state', how='inner')

# Add sector
aeo_2023_ba['sector_abbr'] = 'res'
aeo_2023_ba['load_growth_scenario_2023'] = 'AEO2023 Reference'


In [None]:
# Join aeo data with county_ba mapping to get load growth multipliers for each county
aeo_2023_county = aeo_2023_ba.merge(county_ba, on='ba', how='inner')

In [None]:
# Join aeo data to NERC regions and format eventual df for export
load_growth = (
    load_growth[['year', 'sector_abbr', 'county_id', 'nerc_region_desc', 'nerc_region_abbr', 'load_multiplier', 'load_growth_scenario']][load_growth['year'] > 2025]
    .merge(aeo_2023_county[['county_id', 'year', 'sector_abbr', 'multiplier', 'load_growth_scenario_2023']], on=['county_id', 'year', 'sector_abbr'], how='left')
)

In [None]:
# Substitute 2023-based load multipliers for residential sector
load_growth['load_multiplier'] = np.where(load_growth['sector_abbr'] == 'res', load_growth['multiplier'], load_growth['load_multiplier'])
load_growth['load_growth_scenario'] = np.where(load_growth['sector_abbr'] == 'res', load_growth['load_growth_scenario_2023'], load_growth['load_growth_scenario'])

# Ensure dtypes are correct
load_growth['load_multiplier'] = load_growth['load_multiplier'].astype(float)

# Subset to appropriate columns
load_growth_adjusted = load_growth[['year', 'sector_abbr', 'county_id', 'nerc_region_desc', 'nerc_region_abbr', 'load_growth_scenario', 'load_multiplier']]

In [None]:
# Write the load growth file
load_growth_adjusted.to_csv('../../../data/load_growth_to_model_adjusted.csv', index = False)

In [None]:
# Replace table locally

DB_USER = "postgres"
DB_PASS = "postgres"
DB_NAME = "dgen_db"
DB_PORT = 5432
DB_HOST = "localhost"  # or 127.0.0.1 if using proxy
engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

TBL_SCHEMA   = "diffusion_shared"
VIEW_SCHEMA  = "diffusion_template"
TABLE_NAME   = "load_growth_to_model_adjusted"
VIEW_NAME    = "load_growth_to_model"

def _type_sql(r):
    dt = r["data_type"]
    if dt == "character varying":
        l = r["character_maximum_length"]
        return f"varchar({l})" if l else "varchar"
    if dt == "character":
        l = r["character_maximum_length"]
        return f"char({l})" if l else "char"
    if dt == "numeric":
        p, s = r["numeric_precision"], r["numeric_scale"]
        return f"numeric({p},{s})" if p and s is not None else "numeric"
    if dt in ("double precision", "integer", "bigint", "real", "boolean", "text"):
        return dt
    return r["udt_name"]

with engine.begin() as con:
    # Ensure schemas
    con.exec_driver_sql(f"CREATE SCHEMA IF NOT EXISTS {TBL_SCHEMA};")
    con.exec_driver_sql(f"CREATE SCHEMA IF NOT EXISTS {VIEW_SCHEMA};")

    # Ensure table exists in diffusion_shared, then refresh contents
    load_growth.head(0).to_sql(TABLE_NAME, con=con, schema=TBL_SCHEMA, if_exists="append", index=False)
    con.exec_driver_sql(f"TRUNCATE {TBL_SCHEMA}.{TABLE_NAME};")
    load_growth_adjusted.to_sql(TABLE_NAME, con=con, schema=TBL_SCHEMA, if_exists="append", index=False)

    # If the view already exists, preserve its column types; else create a simple one
    rows = con.execute(
        text("""
            SELECT column_name, data_type, character_maximum_length,
                   numeric_precision, numeric_scale, udt_name
            FROM information_schema.columns
            WHERE table_schema = :schema AND table_name = :view
            ORDER BY ordinal_position
        """),
        {"schema": VIEW_SCHEMA, "view": VIEW_NAME},
    ).mappings().all()

    if rows:
        cast_selects = ", ".join(
            f"CAST({r['column_name']} AS {_type_sql(r)}) AS {r['column_name']}"
            for r in rows
        )
        con.exec_driver_sql(f"""
            CREATE OR REPLACE VIEW {VIEW_SCHEMA}.{VIEW_NAME} AS
            SELECT {cast_selects}
            FROM {TBL_SCHEMA}.{TABLE_NAME};
        """)
    else:
        # first creation: define the canonical order explicitly
        con.exec_driver_sql(f"""
            CREATE OR REPLACE VIEW {VIEW_SCHEMA}.{VIEW_NAME} AS
            SELECT
              year,
              sector_abbr,
              county_id,
              nerc_region_desc,
              nerc_region_abbr,
              load_growth_scenario,
              load_multiplier
            FROM {TBL_SCHEMA}.{TABLE_NAME};
        """)

engine.dispose()


In [None]:
# --- DB config ---
DB_USER = "postgres"
DB_PASS = "postgres"
DB_NAME = "dgendb"
DB_PORT = 5432
DB_HOST = "127.0.0.1"  # Cloud SQL Proxy
conn_str = f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(conn_str)

# --- Object names ---
TABLE_NAME   = "load_growth_to_model_adjusted"   # base table
VIEW_NAME    = "load_growth_to_model"            # view
VIEW_SCHEMA  = "diffusion_template"
TABLE_SCHEMA = "diffusion_shared"

def _type_sql(r):
    dt = r["data_type"]
    if dt == "character varying":
        l = r["character_maximum_length"]; return f"varchar({l})" if l else "varchar"
    if dt == "character":
        l = r["character_maximum_length"]; return f"char({l})" if l else "char"
    if dt == "numeric":
        p, s = r["numeric_precision"], r["numeric_scale"]
        return f"numeric({p},{s})" if p and s is not None else "numeric"
    if dt in (
        "double precision", "integer", "bigint", "real", "boolean", "text",
        "date", "timestamp without time zone", "timestamp with time zone"
    ):
        return dt
    return r["udt_name"]  # fallback

with engine.begin() as con:
    # Ensure schemas exist
    con.execute(text(f'CREATE SCHEMA IF NOT EXISTS {TABLE_SCHEMA};'))
    con.execute(text(f'CREATE SCHEMA IF NOT EXISTS {VIEW_SCHEMA};'))

    # Does the base table already exist?
    table_exists = con.execute(
        text("""
            SELECT EXISTS (
              SELECT 1
              FROM information_schema.tables
              WHERE table_schema = :schema AND table_name = :table
            )
        """),
        {"schema": TABLE_SCHEMA, "table": TABLE_NAME},
    ).scalar()

    if table_exists:
        # Keep structure + deps; just replace rows
        con.execute(text(f'TRUNCATE {TABLE_SCHEMA}.{TABLE_NAME};'))
        load_growth_adjusted.to_sql(
            TABLE_NAME, con=con, schema=TABLE_SCHEMA,
            if_exists="append", index=False, method="multi"
        )
    else:
        # First time: create table from DataFrame
        load_growth_adjusted.to_sql(
            TABLE_NAME, con=con, schema=TABLE_SCHEMA,
            if_exists="replace", index=False, method="multi"
        )

    # Get current view column spec (if the view already exists)
    view_cols = con.execute(
        text("""
            SELECT column_name, data_type, character_maximum_length,
                   numeric_precision, numeric_scale, udt_name
            FROM information_schema.columns
            WHERE table_schema = :schema AND table_name = :view
            ORDER BY ordinal_position
        """),
        {"schema": VIEW_SCHEMA, "view": VIEW_NAME},
    ).mappings().all()

    if view_cols:
        # Preserve existing view column order & types
        cast_selects = ", ".join(
            f'CAST("{c["column_name"]}" AS {_type_sql(c)}) AS "{c["column_name"]}"'
            for c in view_cols
        )
        con.execute(text(f"""
            CREATE OR REPLACE VIEW {VIEW_SCHEMA}.{VIEW_NAME} AS
            SELECT {cast_selects}
            FROM {TABLE_SCHEMA}.{TABLE_NAME};
        """))
    else:
        # First-time view: derive order from the *table* (or use your canonical list)
        table_cols = con.execute(
            text("""
                SELECT column_name, data_type, character_maximum_length,
                       numeric_precision, numeric_scale, udt_name, ordinal_position
                FROM information_schema.columns
                WHERE table_schema = :schema AND table_name = :table
                ORDER BY ordinal_position
            """),
            {"schema": TABLE_SCHEMA, "table": TABLE_NAME},
        ).mappings().all()

        # Preferred order if you want to enforce specific columns:
        preferred = [
            "year", "sector_abbr", "county_id",
            "nerc_region_desc", "nerc_region_abbr",
            "load_growth_scenario", "load_multiplier",
        ]

        # Use preferred order where available, then append any remaining columns in table order
        table_order = [r["column_name"] for r in table_cols]
        ordered = [c for c in preferred if c in table_order] + [c for c in table_order if c not in preferred]

        type_map = {r["column_name"]: _type_sql(r) for r in table_cols}
        cast_selects = ", ".join(
            f'CAST("{c}" AS {type_map.get(c, "text")}) AS "{c}"' for c in ordered
        )

        con.execute(text(f"""
            CREATE OR REPLACE VIEW {VIEW_SCHEMA}.{VIEW_NAME} AS
            SELECT {cast_selects}
            FROM {TABLE_SCHEMA}.{TABLE_NAME};
        """))

engine.dispose()