In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load data
df = pd.read_pickle('../../../data/agent_df_base_res_national_revised_v2.pkl')
load = pd.read_csv('../../../data/load_gwh.csv')
rates = pd.read_csv("../../../data/residential_agents_updated.csv")
cty_prices = pd.read_pickle("../../../data/stanford_files/data_county_to_wholesale.pkl")
county_map = pd.read_csv("../../../data/dgen_county_fips_mapping.csv", dtype={'geoid10':'str', 'county_id':'str'})
households = pd.read_csv("../../../data/household_counts.csv")


In [None]:
# Pull out agent id column
df = df.reset_index(drop=False)

In [None]:
# Join EIA state-level load data and census houseing counts to agent file
df_eia = (
    df
    .merge(load, on = 'state_abbr')
    .merge(households, on = 'state_abbr')
)

# First coerce cols to numeric
cols_to_numeric = ["load_gwh", "households", "customers_in_bin_initial", 
                   "load_kwh_per_customer_in_bin_initial", "load_kwh_in_bin_initial"]

df_eia[cols_to_numeric] = df_eia[cols_to_numeric].apply(
    pd.to_numeric, errors="coerce"
)

In [None]:
# First update housing counts and loads

# Preserve original load:
df_eia["orig_per_cust"] = df_eia["load_kwh_per_customer_in_bin_initial"]

# Housing counts
state_sum_customers = df_eia.groupby("state_abbr")["customers_in_bin_initial"].transform("sum")
scale = (df_eia["households"]*df_eia['pct_of_bldgs_developable']) / state_sum_customers.replace(0, np.nan)

df_eia["customers_in_bin_initial"] = (
    df_eia["customers_in_bin_initial"] * scale
).fillna(0)

# Derate the EIA state-wide loads based on pct of building developable
# customers_in_bin already reflects the percentage of developable buildings
df_eia['load_kwh'] = df_eia['load_gwh']*df_eia['pct_of_bldgs_developable']*1000000

# Compute state totals *after* customer scaling
state_tot_cust = df_eia.groupby("state_abbr")["customers_in_bin_initial"].transform("sum")

# (Optional) sanity: if a state has 0 customers after scaling, avoid div/0
state_tot_cust = state_tot_cust.replace(0, np.nan)

# Build weights that preserve within-state intensity differences
weights = df_eia["customers_in_bin_initial"] * df_eia["orig_per_cust"]

state_weight_sums = weights.groupby(df_eia["state_abbr"]).transform("sum").replace(0, np.nan)

# Your state-level load after derating by developable %
# (You currently repeat the same state figure on each row; we just need its per-state sum)
state_load_totals = df_eia.groupby("state_abbr")["load_kwh"].transform("first")

# Allocation that preserves mix, then rescales to the state total
df_eia["allocated_load_kwh"] = (weights / state_weight_sums) * state_load_totals

# Per-customer and per-bin recomputes
df_eia["load_kwh_per_customer_in_bin_initial"] = (
    df_eia["allocated_load_kwh"] / df_eia["customers_in_bin_initial"]
)

df_eia["load_kwh_in_bin_initial"] = (
    df_eia["customers_in_bin_initial"] * df_eia["load_kwh_per_customer_in_bin_initial"]
)

# Clean up any divisions by zero that became NaN/inf
df_eia[["allocated_load_kwh",
        "load_kwh_per_customer_in_bin_initial",
        "load_kwh_in_bin_initial"]] = df_eia[[
            "allocated_load_kwh",
            "load_kwh_per_customer_in_bin_initial",
            "load_kwh_in_bin_initial"
        ]].fillna(0)

In [None]:
# Next update rates, one liner swap in the new tariff_dict column
agents_rates = df_eia.drop("tariff_dict", axis = 1).merge(rates[['bldg_id', 'tariff_dict']], on = 'bldg_id')

In [None]:
# Next update wholesale prices

# Convert the dict of county wholesale prices to a df for the join
prices_df = pd.DataFrame(
    [(str(k), np.asarray(v, dtype=float)) for k, v in cty_prices.items()],
    columns=["geoid10", "wholesale_prices"]
)

# change agent df county_id column to str
agents_rates['county_id'] = agents_rates['county_id'].astype(str)

# Change Oglala Lakota County fips code for join
county_map['geoid10'] = np.where(county_map['geoid10'] == '46113', '46102', county_map['geoid10'])

# Merge to the county_map
county_prices = county_map[['geoid10', 'county_id']].merge(prices_df, on = ['geoid10'], how = "left")

# Merge the prices to the agent df
agents_prices = agents_rates.merge(county_prices[['county_id', 'wholesale_prices']], on = ['county_id'], how = 'left')

In [None]:
# Prepare agent file for write
df_to_write = agents_prices.set_index('agent_id')

In [None]:
# Write agent file
df_to_write.to_pickle("../../input_agents/agent_df_base_res_national_updated_wholesale_prices.pkl")

In [None]:
# Check weighted averages

avg = (
    df_to_write.groupby("state_abbr")
      .apply(lambda g: np.average(
          g["load_kwh_per_customer_in_bin_initial"],
          weights=g["customers_in_bin_initial"]
      ))
      .reset_index(name="weighted_avg_load_kwh_per_customer")
)