In [None]:
import ast
import contextily as ctx
import psycopg2.extras as pgx
import psycopg2 as pg
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import ast
from bisect import bisect_left


import sqlalchemy
from sqlalchemy import create_engine
import pyarrow as pa
import pyarrow.parquet as pq
from IPython.display import HTML
import base64

from pylab import *
%matplotlib inline
import geopandas as gpd

In [None]:
# Pull most recent schema names

# Direct psycopg2 connection
conn = pg.connect(
    dbname="dgendb",
    user="postgres",
    password="postgres",
    host="127.0.0.1",
    port=5432
)

query = """
SELECT schema_name
FROM information_schema.schemata
WHERE schema_name LIKE 'diffusion_results_%'
"""

# Use the raw psycopg2 connection
recent_schemas = pd.read_sql(query, conn)

In [None]:
for schema in recent_schemas.schema_name:
    print(schema)

In [None]:
# Load data
avg_prices = pd.read_csv("../../../data/average_retail_elec_price.csv")

In [None]:
baseline_schemas = [
"diffusion_results_baseline_nj_2040_20250805_233040000109",
"diffusion_results_baseline_ar_2040_20250805_232731219143",
"diffusion_results_baseline_ok_2040_20250805_225123088433",
"diffusion_results_baseline_id_2040_20250805_225121026559",
"diffusion_results_baseline_al_2040_20250805_225119515591"
]


policy_schemas = [
"diffusion_results_policy_al_2040_20250805_233049343297",
"diffusion_results_policy_ok_2040_20250805_233047201720",
"diffusion_results_policy_id_2040_20250805_231009437795",
"diffusion_results_policy_nj_2040_20250805_225120345675",
"diffusion_results_policy_ar_2040_20250805_225118724359"
]

# Helper to query and tag results
def load_agent_outputs(schema_name, scenario_label):
    query = f'SELECT * FROM "{schema_name}".agent_outputs'
    df = pd.read_sql(query, conn)  # or psycopg2 conn
    df["scenario"] = scenario_label
    df["schema"] = schema_name
    return df

# Aggregate all results
all_dfs = []

for schema in baseline_schemas:
    print(f"Loading baseline schema: {schema}")
    all_dfs.append(load_agent_outputs(schema, "baseline"))

for schema in policy_schemas:
    print(f"Loading policy schema: {schema}")
    all_dfs.append(load_agent_outputs(schema, "policy"))

# Concatenate all results
df = pd.concat(all_dfs, ignore_index=True)
df.to_csv("../../../data/saved_outputs/test_run_five_states_diffusion_update.csv")

In [None]:
df = pd.read_csv("../../../data/saved_outputs/test_run_five_states_diffusion_update.csv")

In [None]:
# -- 1. Add total bill savings
df["total_bill_savings"] = df["first_year_elec_bill_savings"] * df["number_of_adopters"]

# -- 2. Get solar technical potential by 2040
tech_2040 = df[df["year"] == 2040].copy()

# -- 2a. Aggregate total adopters and customers
tech_agg = (
    tech_2040.groupby(["state_abbr", "scenario"])[["number_of_adopters", "customers_in_bin"]]
    .sum()
    .reset_index()
)

# -- 2b. Compute percent of technical potential reached
tech_agg["percent_tech_potential"] = (
    tech_agg["number_of_adopters"] / tech_agg["customers_in_bin"]
) * 100

# -- 2. Compute model-based average electricity prices for 2026 (cents/kWh)
model_2026 = df[(df["year"] == 2026) & (df['scenario'] == 'baseline')].copy()

# -- 3. Calculate average prices by state
avg_prices_model = (
    model_2026.groupby("state_abbr")["avg_elec_price_cents_per_kwh"]
    .mean()
    .reset_index()
)

# -- 4. Merge model and EIA prices
price_comparison = pd.merge(avg_prices_model, avg_prices, on="state_abbr", how="inner")
price_comparison['avg_elec_price_cents_per_kwh'] = price_comparison['avg_elec_price_cents_per_kwh']*100

# -- 5. Melt for grouped bar plot
price_melted = price_comparison.melt(
    id_vars="state_abbr",
    value_vars=["avg_elec_price_cents_per_kwh", "cents_per_kwh"],
    var_name="Source",
    value_name="Average Price (¢/kWh)"
)

# Rename for clarity
price_melted["Source"] = price_melted["Source"].map({
    "avg_elec_price_cents_per_kwh": "Model",
    "cents_per_kwh": "EIA"
})

# -- 3. Aggregate metrics

# 3a. Median system size by state/year/scenario
median_kw = (
    df.groupby(["state_abbr", "year", "scenario"])["system_kw"]
    .median()
    .reset_index()
)

# 3c. Total bill savings (already computed in df)
total_bill_savings = (
    df.groupby(["state_abbr", "year", "scenario"])["total_bill_savings"]
    .sum()
    .reset_index()
)

# Market share reached
df['market_potential'] = df['customers_in_bin']*df['max_market_share']
market_share_reached = (
    df.groupby(["state_abbr", "year", "scenario"], as_index=False)
    .agg(market_potential=("market_potential","sum"),
         market_reached=("number_of_adopters", "sum"))
)
market_share_reached['market_share_reached'] = market_share_reached['market_reached']/market_share_reached['market_potential']

# --------------------- PLOTS ---------------------

# 1. Median System Size (kW)
g = sns.FacetGrid(median_kw, col="state_abbr", col_wrap=4, height=3.5, sharey=False)
g.map_dataframe(sns.lineplot, x="year", y="system_kw", hue="scenario", marker="o")
g.set_titles("{col_name}")
g.set_axis_labels("Year", "Median System Size (kW)")
g.set(xticks=[2026, 2030, 2035, 2040])
g.add_legend()
g.fig.suptitle("Median PV System Size by Scenario", y=1.02)
plt.tight_layout()
plt.show()

# -- 1. Get state order based on model-estimated price
model_sorted = (
    price_melted[price_melted["Source"] == "Model"]
    .sort_values("Average Price (¢/kWh)", ascending=False)
    ["state_abbr"]
    .tolist()
)

# -- 2. Plot with sorted x-axis
plt.figure(figsize=(14, 6))
ax = sns.barplot(
    data=price_melted,
    x="state_abbr",
    y="Average Price (¢/kWh)",
    hue="Source",
    order=model_sorted,
    errorbar=None
)

# -- 3. Add white text labels to each bar
for container in ax.containers:
    for bar in container:
        height = bar.get_height()
        if height > 0:
            ax.text(
                bar.get_x() + bar.get_width() / 2,
                height - 1.5,  # shift slightly below the top
                f"{height:.1f}",
                ha='center',
                va='top',
                color='white',
                fontsize=9,
                fontweight='bold'
            )

# -- 4. Style
plt.title("Average Electricity Price in 2026: Model vs EIA (Sorted by Model)")
plt.ylabel("¢/kWh")
plt.xlabel("State")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# -- 1. Get state order based on policy scenario percentage
policy_sorted = (
    tech_agg[tech_agg["scenario"] == "policy"]
    .sort_values("percent_tech_potential", ascending=False)
    ["state_abbr"]
    .tolist()
)

# -- 2. Plot using the sorted state order
plt.figure(figsize=(14, 6))
ax = sns.barplot(
    data=tech_agg,
    x="state_abbr",
    y="percent_tech_potential",
    hue="scenario",
    order=policy_sorted,
    errorbar=None
)

# -- 3. Add white labels
for container in ax.containers:
    for bar in container:
        height = bar.get_height()
        if height > 0:
            ax.text(
                bar.get_x() + bar.get_width() / 2,
                height - 1.5,
                f"{height:.1f}%",
                ha='center',
                va='top',
                color='white',
                fontsize=9,
                fontweight='bold'
            )

# -- 4. Labels and styling
plt.title("Solar Technical Potential Reached in 2040 (Sorted by Policy %)")
plt.ylabel("Percent of Technical Potential (%)")
plt.xlabel("State")
plt.xticks(rotation=45)
plt.ylim(0, tech_agg["percent_tech_potential"].max() * 1.1)
plt.tight_layout()
plt.show()

# 4. Total Bill Savings by Scenario
g = sns.FacetGrid(total_bill_savings, col="state_abbr", col_wrap=4, height=3.5, sharey=False)
g.map_dataframe(sns.lineplot, x="year", y="total_bill_savings", hue="scenario", marker="o")
g.set_titles("{col_name}")
g.set_axis_labels("Year", "Total First-Year Bill Savings ($)")
g.set(xticks=[2026, 2030, 2035, 2040])
g.add_legend()
g.fig.suptitle("Total First-Year Bill Savings by Scenario", y=1.02)
plt.tight_layout()
plt.show()


metrics = {
    "number_of_adopters": "Number of Adopters",
    "system_kw_cum": "Cumulative Deployment (kW)",
    "batt_kwh_cum": "Cumulative Battery Size (kWh)"
}

# -- 3. Loop over metrics and plot by state_abbr
for col, label in metrics.items():
    # Aggregate data
    grouped = df.groupby(["state_abbr", "year", "scenario"])[col].sum().reset_index()

    # Faceted line plot
    g = sns.FacetGrid(grouped, col="state_abbr", col_wrap=4, height=3.5, sharey=False)
    g.map_dataframe(sns.lineplot, x="year", y=col, hue="scenario", marker="o")
    g.set_titles(col_template="{col_name}")
    g.set_axis_labels("Year", label)
    g.set(xticks=[2026, 2030, 2035, 2040])
    g.add_legend()
    g.fig.suptitle(f"{label} by Scenario (Baseline vs Policy)", y=1.02)
    plt.tight_layout()
    plt.show()


In [None]:
df['prop_system_size'] = df['system_kw']/df['max_system_kw']
df[(df['year'] == 2026) & (df['scenario'] == "policy")].groupby(['state_abbr'], as_index=False).agg({'initial_number_of_adopters':'sum'})