In [None]:
import ast
import contextily as ctx
import psycopg2.extras as pgx
import psycopg2 as pg
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

import ast
from bisect import bisect_left

import sqlalchemy
from sqlalchemy import create_engine
import pyarrow as pa
import pyarrow.parquet as pq
from IPython.display import HTML
import base64

from pylab import *
%matplotlib inline
import geopandas as gpd

In [None]:
# Set up connection to the database
x = 'dgen_db'
engine = create_engine("postgresql+psycopg2://postgres:postgres@127.0.0.1:5432/{0}".format(x))
con = engine.connect()

# Define schemas for baseline and policy results
baseline_schema = 'diffusion_results_baseline_20250717_103511255149'
policy_schema = 'diffusion_results_dollar_per_watt_20250717_103809034877'

# SQL queries to fetch data
sql = """
SELECT * FROM {schema}.agent_outputs
"""

baseline = pd.read_sql(sql.format(schema=baseline_schema), con)
policy = pd.read_sql(sql.format(schema=policy_schema), con)

# Load Cambium data
cambium = pd.read_csv("../../../data/cambium.csv")

In [None]:
# First build an interpolated set of emissions factors from NREL Cambium

# Parameters
start_year, end_year = 2026, 2050

# Build the LRMER lookup from cambium

#  Pivot so each (state,month,hour) is a row and t-years are columns
years_full = list(range(2025, 2051))
camb_pivot = (
    cambium
    .pivot_table(index=['state','month','hour'],
                 columns='t',
                 values='lrmer_co2e')
    .reindex(columns=years_full)       # ensure all years present
)
# Linearly interpolate along the year-axis
camb_interp = camb_pivot.interpolate(axis=1, limit_area='inside')

# Melt back to long format
camb_long = (
    camb_interp
    .reset_index()
    .melt(id_vars=['state','month','hour'],
          var_name='year',
          value_name='lrmer_co2e')
    .query('year >= @start_year and year <= @end_year')
)
# Final lookup keyed by (state,year,month,hour)
lrmer_lookup = camb_long.set_index(
    ['state','year','month','hour']
)['lrmer_co2e'].to_dict()

In [None]:
# Assume baseline and policy DataFrames are already loaded:
scenarios = {'Baseline': baseline, 'Policy': policy}

# 1) Cumulative installations, bill savings, and deployment in MW
fig, axes = plt.subplots(3, 1, figsize=(8, 12), sharex=True)

for name, df in scenarios.items():
    # Apply weights and scalars where appropriate
    df['first_year_elec_bill_savings'] = df['first_year_elec_bill_savings'] * df['new_adopters']

    # Aggregate annual metrics
    annual = (
        df.groupby('year')
          .agg(
              installations=('new_adopters', 'sum'),
              bill_savings=('first_year_elec_bill_savings', 'sum'),
              deployment_kw=('system_kw_cum', 'sum')
          )
    )
    # Compute cumulative series
    annual['cum_installations'] = annual['installations'].cumsum()
    annual['cum_bill_savings'] = annual['bill_savings'].cumsum()
    annual['cum_deployment_mw'] = annual['deployment_kw'] / 1000.0

    # Plot
    axes[0].plot(annual.index, annual['cum_installations'], label=name)
    axes[1].plot(annual.index, annual['cum_bill_savings'], label=name)
    axes[2].plot(annual.index, annual['cum_deployment_mw'], label=name)

axes[0].set_ylabel('Cumulative Installations')
axes[1].set_ylabel('Cumulative Bill Savings ($)')
axes[2].set_ylabel('Cumulative Deployment (MW)')
axes[2].set_xlabel('Year')
for ax in axes:
    ax.grid(True)
    ax.legend()
plt.tight_layout()
plt.show()

# 2) Number of positive NPV households annually
plt.figure(figsize=(8, 4))
for name, df in scenarios.items():
    pos_counts = df[df['npv'] > 0].groupby('year')['bldg_id'].count()
    plt.plot(pos_counts.index, pos_counts.values, marker='o', label=name)
plt.ylabel('Positive NPV Agents')
plt.xlabel('Year')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# 3) Median system payback annually
plt.figure(figsize=(8, 4))
for name, df in scenarios.items():
    median_payback = df.groupby('year')['payback_period'].median()
    plt.plot(median_payback.index, median_payback.values, marker='o', label=name)
plt.ylabel('Median Payback Period (yrs)')
plt.xlabel('Year')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# 4) Cumulative GHG emissions savings
# Assumes lrmer_lookup is defined as dict[(state, year, month, hour)] -> kg/MWh

def compute_cumulative_avoided(df, lrmer_lookup):
    cum = []
    for col in ['consumption_hourly_list',
            'generation_hourly_list',
            'batt_dispatch_profile_list']:
        df[col] = df[col].apply(ast.literal_eval)
    for year, group in df.groupby('year'):
        # prepare datetime index for this year
        dtidx = pd.date_range(f'{year}-01-01', periods=8760, freq='h')
        months = dtidx.month
        hours = dtidx.hour
        total_avoided = 0.0
        for _, row in group.iterrows():
            w = row['new_adopters']
            gen = np.array(row['generation_hourly_list'], dtype=float) * row['system_kw'] * w

            # grab batt profile, but if it's empty, replace with zeros
            raw_batt = row['batt_dispatch_profile_list']
            batt_arr = np.array(raw_batt, dtype=float)
            if batt_arr.size != gen.size:
                batt_arr = np.zeros_like(gen)
            batt = batt_arr * w

            # now it's safe to subtract
            avoided = gen - batt
            # look up emissions factors (kg/MWh -> kg/kWh)
            fac = np.array([lrmer_lookup[(row['state_abbr'], year, m, h)] for m, h in zip(months, hours)], dtype=float) / 1000.0
            total_avoided += (avoided * fac).sum() / 1000.0  # convert kg -> metric tons
        cum.append({'year': year, 'avoided_tons': total_avoided})

    ann = pd.DataFrame(cum).set_index('year')
    ann['cum_avoided_tons'] = ann['avoided_tons'].cumsum()
    return ann['cum_avoided_tons']

plt.figure(figsize=(8, 4))
for name, df in scenarios.items():
    cumulative = compute_cumulative_avoided(df, lrmer_lookup)
    plt.plot(cumulative.index, cumulative.values, marker='o', label=name)
plt.ylabel('Cumulative CO₂e Avoided (tons)')
plt.xlabel('Year')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
