In [None]:
# Adjust the default pv and battery price trajectories for our baseline and dollar-per-watt scenarios, upload the tables to the cloud sql instance

In [None]:
# Imports
import sys, os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

sys.path.append(os.path.abspath(".."))

from input_data_functions import stacked_sectors

In [None]:
# Load large file
lbnl = pd.read_csv('../../../data/TTS_LBNL_PUBLIC_FILE_21-Aug-2024_all.csv')

In [None]:
# Load data
pv = pd.read_csv('../../input_data/pv_prices/pv_price_atb23_mid.csv')
batt = pd.read_csv('../../input_data/batt_prices/batt_prices_FY23_mid.csv')
pv_batt = pd.read_csv('../../input_data/pv_plus_batt_prices/pv_plus_batt_prices_FY23_mid.csv') 
inflation = pd.read_csv('../../../data/inflation_multipliers.csv')

In [None]:
# First filter LBNL data for relevant technology and customer segment
lbnl_filt = (
    lbnl[
        (lbnl['technology_type'].isin(['pv-only', 'pv+storage', 'storage-only'])) &
        (lbnl['customer_segment'] == 'RES') &
        (lbnl['total_installed_price'] > 0) &
        (
            (lbnl['PV_system_size_DC'] > 0) |
            (lbnl['battery_rated_capacity_kWh'] > 0)
        )
    ]
).copy()

# First extract year month day from the date column
lbnl_filt['parsed_date'] = pd.to_datetime(lbnl_filt['installation_date'], format="%d-%b-%Y", errors='coerce')

# Extract year, month, day into new columns
lbnl_filt['year'] = lbnl_filt['parsed_date'].dt.year
lbnl_filt['month'] = lbnl_filt['parsed_date'].dt.month
lbnl_filt['day'] = lbnl_filt['parsed_date'].dt.day

# 2024 prices
lbnl_filt = lbnl_filt[lbnl_filt['year'] >= 2024]

# Join to inflation data
lbnl_inflation = lbnl_filt.merge(inflation, on=['year', 'month'], how='left')

# Inflation adjust prices
lbnl_inflation['total_installed_price'] = lbnl_inflation['total_installed_price'] * lbnl_inflation['inflation_multiplier']  
lbnl_inflation['battery_price'] = lbnl_inflation['battery_price'] * lbnl_inflation['inflation_multiplier']  

# Calculate median solar and prices over the last four years

# PV prices when the tech is either 'pv-only' or 'pv+storage', use the total_installed_price
price_per_kw_solar = (
    lbnl_inflation[
        (lbnl_inflation['technology_type'].isin(['pv-only', 'pv+storage'])) &
        (~lbnl_inflation['PV_system_size_DC'].isna()) &
        (~lbnl_inflation['total_installed_price'].isna())
        ][['PV_system_size_DC', 'total_installed_price', 'state']]
)
price_per_kw_solar['price_per_kw'] = price_per_kw_solar['total_installed_price'] / price_per_kw_solar['PV_system_size_DC']

# Battery prices when the tech is either 'storage-only' or 'pv+storage', 
# use the battery _price for pv+storage, and the total_installed_price for storage-only
price_per_kwh_battery_only = (
    lbnl_inflation[
        (lbnl_inflation['technology_type'].isin(['storage-only']))
    ][['battery_rated_capacity_kWh', 'total_installed_price']]
)
price_per_kwh_pv_battery = (
    lbnl_inflation[
        (lbnl_inflation['technology_type'].isin(['pv+storage'])) &
        (lbnl_inflation['battery_rated_capacity_kWh'] > 0) &
        (lbnl_inflation['battery_price'] > 0)
    ]
    [['battery_rated_capacity_kWh', 'battery_price']]
)
price_per_kwh_pv_battery['total_installed_price'] = price_per_kwh_pv_battery['battery_price']
price_per_kwh_battery = pd.concat([price_per_kwh_battery_only[['battery_rated_capacity_kWh', 'total_installed_price']], 
                                   price_per_kwh_pv_battery[['battery_rated_capacity_kWh', 'total_installed_price']]], ignore_index=True)
price_per_kwh_battery['price_per_kwh'] = price_per_kwh_battery['total_installed_price'] / price_per_kwh_battery['battery_rated_capacity_kWh']

In [None]:
# Calculate 2024 medians for LBNL data
median_price_pv_per_kw = price_per_kw_solar['price_per_kw'].median()
median_price_batt_per_kwh = price_per_kwh_battery['price_per_kwh'].median()

In [None]:
# --- Policy prices: 3% annual decline from the start (2026) ---

years = np.arange(2026, 2051)  # 2026..2050 inclusive

# Anchor the starting price at your medians
start_pv   = 1000
start_batt = 800

# Apply 2% decline each year after 2026
prices_pv   = start_pv   * (0.98 ** (years - 2026))
prices_batt = start_batt * (0.95 ** (years - 2026))

price_df = pd.DataFrame({
    "year": years,
    "price_pv": prices_pv,
    "price_batt": prices_batt
})

In [None]:
# --- Baseline ---

# For PV adjust the price trajectory with the LBNL median as the 2024 price
pv['system_capex_per_kw_res'] = pv['system_capex_per_kw_res'] * (median_price_pv_per_kw / pv[pv['year'] == 2026]['system_capex_per_kw_res'].values[0])
pv_batt['system_capex_per_kw_res'] = pv_batt['system_capex_per_kw_res'] * (median_price_pv_per_kw / pv_batt[pv_batt['year'] == 2026]['system_capex_per_kw_res'].values[0])

# Same for batteries
batt['batt_capex_per_kwh_res'] = pv_batt['batt_capex_per_kwh_res'] * (median_price_batt_per_kwh / pv_batt[pv_batt['year'] == 2026]['batt_capex_per_kwh_res'].values[0]) 
pv_batt['batt_capex_per_kwh_res'] = pv_batt['batt_capex_per_kwh_res'] * (median_price_batt_per_kwh / pv_batt[pv_batt['year'] == 2026]['batt_capex_per_kwh_res'].values[0])

# Adjust for only after 2026
pv = pv[pv['year'] >= 2026]
pv_batt = pv_batt[pv_batt['year'] >= 2026]
batt = batt[batt['year']>= 2026]

# --- Dollar per watt ---

# First create new DFs
pv_dollar_per_watt = pv.copy().reset_index(drop=True)
batt_dollar_per_watt = batt.copy().reset_index(drop=True)
pv_batt_dollar_per_watt = pv_batt.copy().reset_index(drop=True)

# Adjust relevant columns for dollar per watt
pv_dollar_per_watt['system_capex_per_kw_res'] = price_df['price_pv']
pv_batt_dollar_per_watt['system_capex_per_kw_res'] = price_df['price_pv']
batt_dollar_per_watt['batt_capex_per_kwh_res'] = price_df['price_batt']
pv_batt_dollar_per_watt['batt_capex_per_kwh_res'] = price_df['price_batt']

# Apply the same functions as dGen so data is in proper format
pv = stacked_sectors(pv)
pv_batt = stacked_sectors(pv_batt)
batt = stacked_sectors(batt)

pv_dollar_per_watt = stacked_sectors(pv_dollar_per_watt)
pv_batt_dollar_per_watt = stacked_sectors(pv_batt_dollar_per_watt)
batt_dollar_per_watt = stacked_sectors(batt_dollar_per_watt)

In [None]:
# Upload new DFs to Cloud SQL Instance

# Connection config
DB_USER = "postgres"
DB_PASS = "postgres"
DB_NAME = "dgendb"
DB_PORT = 5432
DB_HOST = "127.0.0.1"  # local Cloud SQL Proxy

# Build SQLAlchemy connection string
conn_str = f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

# Create engine
engine = create_engine(conn_str)

# Replace the relevant tables in the database
pv.to_sql("pv_price_baseline", engine, schema="diffusion_shared", if_exists="replace", index=False)
pv_batt.to_sql("pv_plus_batt_baseline", engine, schema="diffusion_shared",  if_exists="replace", index=False)
batt.to_sql("batt_prices_baseline", engine, schema="diffusion_shared", if_exists="replace", index=False)

# Replace the relevant tables in the database
pv_dollar_per_watt.to_sql("pv_price_dollar_per_watt", engine, schema="diffusion_shared", if_exists="replace", index=False)
pv_batt_dollar_per_watt.to_sql("pv_plus_batt_dollar_per_watt", engine, schema="diffusion_shared",  if_exists="replace", index=False)
batt_dollar_per_watt.to_sql("batt_prices_dollar_per_watt", engine, schema="diffusion_shared", if_exists="replace", index=False)

engine.dispose()