In [1]:
from databricks.sdk import WorkspaceClient
w = WorkspaceClient()

w.dbutils.library.restartPython()

In [2]:
from databricks.connect import DatabricksSession
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window as W
from datetime import datetime, date
import pandas as pd
import polars as pl

spark = DatabricksSession.builder.getOrCreate()

ppa_details_df = pd.read_csv("utils/ppa_details.csv")
ppa_details = spark.createDataFrame(ppa_details_df)

In [3]:
from utils.extract import extract_deal_info
from utils.transform import clean_and_aggregate_earnings

start_date = "2025-01-01"
end_date = "2026-12-31"

sim_complete_deal_info = extract_deal_info()

# Hindcast

In [4]:
hindcast_df = spark.table("exploration.default.earnings_demand_model_2024_hindcast")
hindcast_earnings = clean_and_aggregate_earnings(
    df=hindcast_df,
    deal_info_df=sim_complete_deal_info,
    curve_name="2024_hindcast",
    start_date=start_date,
    end_date=end_date
)

combined_earnings = hindcast_earnings

# Output of spiked
### need extra step in between for this, prolly automate the generation/update of the table, below is usage only

In [5]:
# model_ids = ["20250320_spiked", "20250320_50spiked", "20250320_150spiked"]
model_ids = ["20250320_spiked"]

In [6]:
for model_id in model_ids:
    model_df = spark.table(f"exploration.default.earnings_demand_model_{model_id}")
    model_earnings = clean_and_aggregate_earnings(
        df=model_df,
        deal_info_df=sim_complete_deal_info,
        curve_name=model_id,
        start_date=start_date,
        end_date=end_date,
        is_cap_adjusted=True
    )

    combined_earnings = combined_earnings.unionByName(model_earnings)

In [7]:
sample_rank = (
    spark.table(f"exploration.default.earnings_demand_model_20250320_spiked")
    .filter((F.col("status").isin(["Approved", "Confirmed"])))
    .groupBy(
        "sample_id",
        F.quarter("interval_date").alias("quarter"),
        F.year("interval_date").alias("year"),
    )
    .agg(F.sum("earnings").alias("earnings"))
    .withColumn(
        "earnings_rank",
        F.row_number().over(
            W.partitionBy("year", "quarter").orderBy(F.col("earnings"))
        ),
    )
).cache()

p50_df = spark.table(f"exploration.default.earnings_demand_model_20250320_spiked")
p90_earnings = clean_and_aggregate_earnings(
    df=p50_df,
    deal_info_df=sim_complete_deal_info,
    curve_name="20250320_spiked_p90",
    start_date=date(2025, 4, 1),
    end_date=end_date,
    sample_filter_df=sample_rank,
    rank=54,
)

p10_earnings = clean_and_aggregate_earnings(
    df=p50_df,
    deal_info_df=sim_complete_deal_info,
    curve_name="20250320_spiked_p10",
    start_date=date(2025, 4, 1),
    end_date=end_date,
    sample_filter_df=sample_rank,
    rank=7,
)

combined_earnings = combined_earnings.unionByName(p90_earnings)
combined_earnings = combined_earnings.unionByName(p10_earnings)

# Aurora scenarios

In [8]:
aurora_df = spark.table("exploration.default.earnings_aurora_scenarios")
aurora_earnings = clean_and_aggregate_earnings(
    df=aurora_df,
    deal_info_df=sim_complete_deal_info,
    curve_name="20250320_aurora",
    start_date=start_date,
    end_date=end_date,
    is_aurora=True
)

combined_earnings = combined_earnings.unionByName(aurora_earnings)

In [0]:
combined_earnings_df = pl.from_pandas(combined_earnings.toPandas())

# Outlook output

In [0]:
curve_names = pl.DataFrame(
    {
        "curve": [
            "2024_hindcast",
            "20250320_spiked_p90",
            "20250320_spiked_p10",
            "20250320_spiked",
            "20250320_50spiked",
            "20250320_150spiked",
            "Central",
            "Messy",
        ],
        "Curve Name": [
            "2024 Hindcast",
            "Demand Model (P90)",
            "Demand Model (P10)",
            "Demand Model (Mean)",
            "Demand Model (50% Cap)",
            "Demand Model (150% Cap)",
            "Aurora Central",
            "Aurora Messy",
        ],
    }
)

(
    combined_earnings_df.filter(
        pl.col("total_earnings").is_not_null() & (pl.col("year") >= 2025)
    )
    .join(curve_names, "curve")
    .with_columns(
        pl.when(pl.col("buy_sell") == "Buy")
        .then(pl.col("total_income"))
        .otherwise(-pl.col("total_cost"))
        .alias("floating_earnings"),
        pl.when(pl.col("buy_sell") == "Buy")
        .then(-pl.col("total_cost"))
        .otherwise(pl.col("total_income"))
        .alias("fixed_earnings"),
        pl.when(pl.col("deal_id") == 888_888)
        .then(pl.lit("Hypothetical"))
        .otherwise(pl.col("status"))
        .alias("status"),
    )
    .select(
        pl.col("Curve Name"),
        pl.col("year").alias("Year"),
        pl.col("quarter").alias("Quarter"),
        pl.col("deal_id").alias("Deal ID"),
        pl.col("deal_name").alias("Deal Name"),
        pl.col("product_name").alias("Product Name"),
        pl.col("instrument").alias("Instrument"),
        pl.when(pl.col("status").is_in(["Approved", "Confirmed"]))
        .then(pl.lit("Contracted"))
        .otherwise(pl.lit("Hypothetical"))
        .alias("Status"),
        pl.col("buy_sell").alias("Buy/Sell"),
        pl.col("strategy").alias("Strategy"),
        pl.col("regionid").alias("Region ID"),
        pl.col("group").alias("Group"),
        pl.col("volume_mwh").alias("Volume (MWh)"),
        pl.col("fixed_earnings").alias("Fixed Earnings"),
        (pl.col("fixed_earnings") / pl.col("volume_mwh")).alias("VW Fixed Rate"),
        pl.col("floating_earnings").alias("Floating Earnings"),
        (pl.col("floating_earnings") / pl.col("volume_mwh")).alias("VW Floating Rate"),
        pl.col("twp").alias("TW Floating Price"),
        pl.col("total_earnings").alias("Total Earnings"),
    )
    .write_csv("consolidated_earnings_outlook_v2.csv")
)