# Project Storm: Linking Predicted SCS Wind Likelihood to State-Level Power Outages

**Objective:** Analyze the relationship between the predicted monthly likelihood/frequency of Severe Convective Storms (SCS) wind events (aggregated to the state level) and observed state-level monthly power outage metrics.

**Methodology:**
1. Load and clean historical power outage data from `outages.xlsx`.
2. Aggregate outage data to meaningful monthly metrics per state.
3. Load the previously trained multi-state SCS prediction model and scaler.
4. Generate historical monthly *predicted expected hits* per county using the model.
5. Aggregate predicted hits to the **state-month** level.
6. Merge state-level outage metrics and state-level predicted storm likelihood.
7. Analyze correlations and potentially build a simple regression model (`State Outage ~ State Predicted Hits`).

**Data Requirements:**
* `outages.xlsx`: State-level monthly outage data. Sheet: `250409 Outage Data Analysis vSh`.
* `county_monthly_climate_variables_target_states.parquet` (or `.csv`): Processed climate data for the 14 target states (needs to cover historical period 2015-2024).
* `scs_wind_target_AnomInd_lgbm_calibrated.joblib`: Trained multi-state SCS occurrence prediction model.
* `scs_wind_target_states_scaler.joblib`: Scaler fitted on training data for the SCS model.

In [1]:
# --- Cell 2: Setup & Configuration ---
import pandas as pd
import numpy as np
import joblib
import os
import logging
import matplotlib.pyplot as plt
import seaborn as sns # For plotting correlations
from sklearn.preprocessing import StandardScaler # For loading scaler
import lightgbm as lgb # For loading model object structure
from sklearn.calibration import CalibratedClassifierCV # For loading model object structure
import gc

# --- Configuration ---
# --- Input Paths ---
CLIMATE_FILE = "../output/processed_climate/county_monthly_climate_variables_target_states.parquet"
MODEL_PATH = "../output/models/scs_wind_target_AnomInd_lgbm_calibrated.joblib" # Path to calibrated binary classifier
SCALER_PATH = "../output/models/scs_wind_target_states_scaler.joblib" # Path to scaler for Anom+Index features
OUTAGE_FILE_PATH = "../data/outages/outages.xlsx"
OUTAGE_SHEET_NAME = '250409 Outage Data Analysis vSh' # Or 0 for first sheet
COUNTIES_DEF_FILE = "../data/counties/2024_counties.txt"

# --- Parameters ---
TARGET_EVENT_TYPE = 'scs_wind'
target_state_fips = [ # List of 14 states
    "12", "20", "29", "40", "48", "05", "22", "28",
    "01", "13", "45", "37", "47", "51"
]
TARGET_OUTAGE_METRICS = [ # Columns available in outages.xlsx
    'CustomerHoursOutTotal', 'MaxCustomersOutTotal', 'CustomersTrackedTotal'
]
# Map State Names (from outage file) to State FIPS
STATE_NAME_TO_FIPS = { # Add ALL states present in outage file that are also in target_state_fips
    'Florida': '12', 'Kansas': '20', 'Missouri': '29', 'Oklahoma': '40',
    'Texas': '48', 'Arkansas': '05', 'Louisiana': '22', 'Mississippi': '28',
    'Alabama': '01', 'Georgia': '13', 'South Carolina': '45',
    'North Carolina': '37', 'Tennessee': '47', 'Virginia': '51'
}

# --- !! ADDED: Define the last year for historical matching !! ---
MAX_EVALUATION_YEAR = 2024 # Use the last year of your reliable historical EVENT data period

# --- Output Paths ---
ANALYSIS_OUTPUT_DIR = "../output/analysis"
os.makedirs(ANALYSIS_OUTPUT_DIR, exist_ok=True)

# --- Setup Logger ---
LOG_FILE = "../logs/08_outage_analysis_log.log"
logger = logging.getLogger("OutageAnalysisLogger")
logger.setLevel(logging.DEBUG)
if not logger.handlers:
    fh = logging.FileHandler(LOG_FILE, mode='w'); fh.setLevel(logging.DEBUG); fh_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"); fh.setFormatter(fh_formatter); logger.addHandler(fh)
    sh = logging.StreamHandler(); sh.setLevel(logging.INFO); sh_formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] %(name)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"); sh.setFormatter(sh_formatter); logger.addHandler(sh)

logger.info(f"--- Outage Analysis Notebook Started (Event Type: {TARGET_EVENT_TYPE}) ---")
logger.info(f"Processing target states: {target_state_fips}")
logger.info(f"Using Outage File: {OUTAGE_FILE_PATH}")
logger.info(f"Max Evaluation Year set to: {MAX_EVALUATION_YEAR}") # Log the value

[2025-04-14 13:23:48] [INFO] OutageAnalysisLogger - --- Outage Analysis Notebook Started (Event Type: scs_wind) ---
[2025-04-14 13:23:48] [INFO] OutageAnalysisLogger - Processing target states: ['12', '20', '29', '40', '48', '05', '22', '28', '01', '13', '45', '37', '47', '51']
[2025-04-14 13:23:48] [INFO] OutageAnalysisLogger - Using Outage File: ../data/outages/outages.xlsx
[2025-04-14 13:23:48] [INFO] OutageAnalysisLogger - Max Evaluation Year set to: 2024


In [3]:
# --- Cell 3: Load and Clean Outage Data (State Level - Corrected Var Name) ---
import pandas as pd
import numpy as np
import os
import logging # Ensure logger is available
import gc # For garbage collection

# Ensure target_state_fips and STATE_NAME_TO_FIPS exist from Cell #2
if 'target_state_fips' not in locals() or not target_state_fips: raise NameError("target_state_fips list not defined.")
if 'STATE_NAME_TO_FIPS' not in locals(): raise NameError("STATE_NAME_TO_FIPS mapping not defined.")
if 'OUTAGE_FILE_PATH' not in locals(): raise NameError("OUTAGE_FILE_PATH not defined.")
# *** Check for the CORRECT variable name from Cell #2 ***
if 'TARGET_OUTAGE_METRICS' not in locals() or not TARGET_OUTAGE_METRICS:
     raise NameError("TARGET_OUTAGE_METRICS list not defined or empty in Cell #2.")

df_outages = pd.DataFrame() # Initialize
logger.info("--- Loading and Cleaning State-Level Outage Data from First Sheet ---")
try:
    # --- Load Data ---
    if not os.path.exists(OUTAGE_FILE_PATH): raise FileNotFoundError(f"Outage file not found: {OUTAGE_FILE_PATH}")

    logger.info(f"Loading outage data from first sheet of {OUTAGE_FILE_PATH}...")
    df_outages_raw = pd.read_excel(OUTAGE_FILE_PATH, sheet_name=0, dtype={'CountyFIPS': str}) # Read first sheet
    logger.info(f"Loaded {len(df_outages_raw)} outage records initially.")
    logger.debug(f"Initial columns: {df_outages_raw.columns.tolist()}")

    # --- Initial Cleaning & Validation ---
    # *** Use CORRECT variable TARGET_OUTAGE_METRICS ***
    required_cols = ['STATE', 'Year', 'Month'] + TARGET_OUTAGE_METRICS
    missing_cols = [col for col in required_cols if col not in df_outages_raw.columns]
    if missing_cols: raise ValueError(f"Outage data missing required columns: {missing_cols}.")

    # 1. Map State Name to State FIPS
    logger.info("Mapping State names to State FIPS...")
    df_outages_raw['state_fips'] = df_outages_raw['STATE'].map(STATE_NAME_TO_FIPS)
    # ... (rest of mapping logic) ...
    mapped_count = df_outages_raw['state_fips'].notna().sum(); unmapped_states = df_outages_raw[df_outages_raw['state_fips'].isna()]['STATE'].unique(); logger.info(f"Mapped {mapped_count} records.");
    if len(unmapped_states) > 0: logger.warning(f"Could not map states: {unmapped_states}.")

    # 2. Filter for Target States using FIPS
    logger.info(f"Filtering outage data for target states FIPS: {target_state_fips}...")
    initial_rows = len(df_outages_raw)
    df_outages_state = df_outages_raw[df_outages_raw['state_fips'].isin(target_state_fips)].copy()
    logger.info(f"Filtered from {initial_rows} to {len(df_outages_state)} records.")
    if df_outages_state.empty: logger.warning("Outage DataFrame empty after state filter!")

    # 3. Create Monthly Timestamp
    logger.info("Creating monthly timestamp...")
    df_outages_state['Year'] = pd.to_numeric(df_outages_state['Year'], errors='coerce'); df_outages_state['Month'] = pd.to_numeric(df_outages_state['Month'], errors='coerce')
    df_outages_state.dropna(subset=['Year', 'Month'], inplace=True); df_outages_state['Year'] = df_outages_state['Year'].astype(int); df_outages_state['Month'] = df_outages_state['Month'].astype(int)
    df_outages_state['time'] = pd.to_datetime(df_outages_state[['Year', 'Month']].assign(DAY=1))

    # 4. Convert Metrics to Numeric (Use CORRECT variable TARGET_OUTAGE_METRICS)
    logger.info(f"Converting metrics {TARGET_OUTAGE_METRICS} to numeric...")
    for col in TARGET_OUTAGE_METRICS:
        df_outages_state[col] = pd.to_numeric(df_outages_state[col], errors='coerce')
    nan_counts = df_outages_state[TARGET_OUTAGE_METRICS].isnull().sum()
    if nan_counts.sum() > 0: logger.warning(f"NaN values found in metrics:\n{nan_counts[nan_counts > 0]}")
    # Drop rows where essential numbers are missing? Or impute? Let's drop for now.
    logger.info("Dropping rows with NaN in key numeric/ID columns...")
    key_numeric_cols = ['state_fips', 'time', 'CustomersTrackedTotal', 'MaxCustomersOutTotal', 'CustomerHoursOutTotal'] # Adjust as needed
    initial_rows_dropna = len(df_outages_state)
    df_outages_state.dropna(subset=[col for col in key_numeric_cols if col in df_outages_state.columns], inplace=True)
    if len(df_outages_state) < initial_rows_dropna : logger.warning(f"Dropped {initial_rows_dropna - len(df_outages_state)} rows with NaNs in key metrics/IDs.")


    # 5. Calculate Percentage Metric
    logger.info("Calculating Peak Pct Out...")
    tracked = df_outages_state['CustomersTrackedTotal']; max_out_col = 'MaxCustomersOutTotal'
    if max_out_col not in df_outages_state.columns: logger.warning(f"'{max_out_col}' not found."); df_outages_state['peak_pct_out'] = np.nan
    else:
        out = df_outages_state[max_out_col]
        df_outages_state['peak_pct_out'] = np.where(tracked.notna() & (tracked > 0) & out.notna(), (out / tracked) * 100, np.nan)
    df_outages_state['peak_pct_out'].fillna(0, inplace=True)

    # 6. Select and Sort Final Columns
    final_cols = ['state_fips', 'time', 'Year', 'Month'] + TARGET_OUTAGE_METRICS + ['peak_pct_out'] # Use TARGET_OUTAGE_METRICS
    df_outages_state = df_outages_state[[col for col in final_cols if col in df_outages_state.columns]].copy()
    df_outages_state.sort_values(by=['state_fips', 'time'], inplace=True)

    if df_outages_state.empty: logger.warning("State outage DataFrame is empty after cleaning!")
    else:
        logger.info("State-level outage data loading and cleaning complete.")
        print("\n--- Cleaned State-Level Monthly Outage Data Sample ---"); display(df_outages_state.head())
        print("\n--- Cleaned Outage Data Info ---"); df_outages_state.info()

except FileNotFoundError as fnf: logger.exception(f"Outage file error: {fnf}"); raise fnf
# ... (rest of except blocks) ...
except Exception as e: logger.exception(f"Failed loading/cleaning outage data: {e}"); raise e

# Clean up
if 'df_outages_raw' in locals(): del df_outages_raw; gc.collect()

[2025-04-14 13:25:35] [INFO] OutageAnalysisLogger - --- Loading and Cleaning State-Level Outage Data from First Sheet ---
[2025-04-14 13:25:35] [INFO] OutageAnalysisLogger - Loading outage data from first sheet of ../data/outages/outages.xlsx...


[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Loaded 5916 outage records initially.
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Mapping State names to State FIPS...
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Mapped 4872 records.
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Filtering outage data for target states FIPS: ['12', '20', '29', '40', '48', '05', '22', '28', '01', '13', '45', '37', '47', '51']...
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Filtered from 5916 to 4872 records.
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Creating monthly timestamp...
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Converting metrics ['CustomerHoursOutTotal', 'MaxCustomersOutTotal', 'CustomersTrackedTotal'] to numeric...
CustomerHoursOutTotal    3580
MaxCustomersOutTotal     3580
CustomersTrackedTotal    3580
dtype: int64
[2025-04-14 13:25:37] [INFO] OutageAnalysisLogger - Dropping rows with NaN in key numeric/ID columns...
[2025-04-14 13:25:37] [IN


--- Cleaned State-Level Monthly Outage Data Sample ---


Unnamed: 0,state_fips,time,Year,Month,CustomerHoursOutTotal,MaxCustomersOutTotal,CustomersTrackedTotal,peak_pct_out
3737,1,2017-06-01,2017,6,18725.11,6418.0,9509.0,67.493953
3738,1,2017-07-01,2017,7,41080.71,11289.0,18911.0,59.695415
3739,1,2017-08-01,2017,8,28912.73,13160.0,19068.0,69.016153
3740,1,2017-09-01,2017,9,681071.8,26006.0,93347.0,27.859492
3741,1,2017-10-01,2017,10,1339345.0,100907.0,818476.0,12.328645



--- Cleaned Outage Data Info ---
<class 'pandas.core.frame.DataFrame'>
Index: 1292 entries, 3737 to 5219
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   state_fips             1292 non-null   object        
 1   time                   1292 non-null   datetime64[ns]
 2   Year                   1292 non-null   int64         
 3   Month                  1292 non-null   int64         
 4   CustomerHoursOutTotal  1292 non-null   float64       
 5   MaxCustomersOutTotal   1292 non-null   float64       
 6   CustomersTrackedTotal  1292 non-null   float64       
 7   peak_pct_out           1292 non-null   float64       
dtypes: datetime64[ns](1), float64(4), int64(2), object(1)
memory usage: 90.8+ KB


In [4]:
# --- Cell 4: Prepare Final Monthly Outage Data (No Aggregation Needed) ---

logger.info("--- Preparing Final Monthly Outage Metrics ---")

# df_outages_state should be available from Cell #3
if 'df_outages_state' not in locals() or df_outages_state.empty:
    logger.error("Cleaned outage data (df_outages_state) not available or empty. Skipping.")
    # Initialize df_outages_monthly_agg as empty to avoid NameError later
    df_outages_monthly_agg = pd.DataFrame()
else:
    try:
        # --- Select relevant columns based on TARGET_OUTAGE_METRICS from Cell #2 ---
        cols_to_keep = ['state_fips', 'time'] + TARGET_OUTAGE_METRICS
        # Also keep the calculated peak_pct_out if it exists
        if 'peak_pct_out' in df_outages_state.columns:
             cols_to_keep.append('peak_pct_out')
        # Ensure all requested columns actually exist in df_outages_state
        cols_present = [col for col in cols_to_keep if col in df_outages_state.columns]
        missing_selection = list(set(cols_to_keep) - set(cols_present))
        if missing_selection:
             logger.warning(f"Columns requested in cols_to_keep missing from df_outages_state: {missing_selection}")

        logger.info(f"Selecting final metric columns: {cols_present}")
        df_outages_monthly_agg = df_outages_state[cols_present].copy()

        # Optional: Rename columns for clarity if desired
        # df_outages_monthly_agg.rename(columns={'CustomerHoursOutTotal': 'CustHrsOut', ...}, inplace=True)

        if df_outages_monthly_agg.empty:
            logger.warning("Monthly aggregated outage DataFrame is empty after column selection!")
        else:
            logger.info(f"Final monthly outage data prepared. Shape: {df_outages_monthly_agg.shape}")
            print("\n--- Final Monthly Outage Data Sample ---")
            display(df_outages_monthly_agg.head())
            # Check NaN counts in the final selected metrics
            final_metric_cols = [col for col in cols_present if col not in ['state_fips', 'time']]
            final_nan_counts = df_outages_monthly_agg[final_metric_cols].isnull().sum()
            if final_nan_counts.sum() > 0:
                 logger.warning(f"NaN values present in final outage metrics:\n{final_nan_counts[final_nan_counts > 0]}")


    except KeyError as ke:
         logger.exception(f"Column not found error during final preparation: {ke}")
         df_outages_monthly_agg = pd.DataFrame() # Ensure empty on error
         raise ke
    except Exception as e:
        logger.exception(f"Failed during final outage data preparation: {e}")
        df_outages_monthly_agg = pd.DataFrame() # Ensure empty on error
        raise e

# Clean up intermediate frame if it exists
if 'df_outages_raw' in locals():
    del df_outages_raw
    gc.collect()

[2025-04-14 13:25:42] [INFO] OutageAnalysisLogger - --- Preparing Final Monthly Outage Metrics ---
[2025-04-14 13:25:42] [INFO] OutageAnalysisLogger - Selecting final metric columns: ['state_fips', 'time', 'CustomerHoursOutTotal', 'MaxCustomersOutTotal', 'CustomersTrackedTotal', 'peak_pct_out']
[2025-04-14 13:25:42] [INFO] OutageAnalysisLogger - Final monthly outage data prepared. Shape: (1292, 6)



--- Final Monthly Outage Data Sample ---


Unnamed: 0,state_fips,time,CustomerHoursOutTotal,MaxCustomersOutTotal,CustomersTrackedTotal,peak_pct_out
3737,1,2017-06-01,18725.11,6418.0,9509.0,67.493953
3738,1,2017-07-01,41080.71,11289.0,18911.0,59.695415
3739,1,2017-08-01,28912.73,13160.0,19068.0,69.016153
3740,1,2017-09-01,681071.8,26006.0,93347.0,27.859492
3741,1,2017-10-01,1339345.0,100907.0,818476.0,12.328645


In [5]:
# --- Cell 5: Load Historical Climate Features ---

logger.info("--- Loading Historical Climate Features for Target States ---")

df_climate_hist = pd.DataFrame() # Initialize
# feature_cols should be defined from the modeling notebook context
# If not, load it or define it based on the saved model/scaler
# feature_cols = [...]

try:
    # Check if climate file path is defined
    if 'CLIMATE_FILE' not in locals() or not CLIMATE_FILE:
        raise ValueError("CLIMATE_FILE path not defined in Cell #2.")
    if not os.path.exists(CLIMATE_FILE):
        raise FileNotFoundError(f"Climate feature file not found: {CLIMATE_FILE}")

    # Load the parquet/CSV containing county-monthly features
    logger.info(f"Loading historical climate features from: {CLIMATE_FILE}")
    if CLIMATE_FILE.endswith(".parquet"):
        df_climate_hist = pd.read_parquet(CLIMATE_FILE)
    else:
        df_climate_hist = pd.read_csv(CLIMATE_FILE, parse_dates=['time'])
    logger.info(f"Loaded climate data shape: {df_climate_hist.shape}")

    # Ensure required columns exist
    if 'county_geoid' not in df_climate_hist.columns or 'time' not in df_climate_hist.columns:
         raise ValueError("Climate data missing 'county_geoid' or 'time' columns.")

    # Ensure time is datetime and consistent format (e.g., month start)
    df_climate_hist['time'] = pd.to_datetime(df_climate_hist['time']).dt.normalize() # Ensure start of day
    # If time isn't already month start, uncomment below:
    # df_climate_hist['time'] = df_climate_hist['time'] - pd.tseries.offsets.MonthBegin(1) # Ensure Month Start
    df_climate_hist['year'] = df_climate_hist['time'].dt.year

    # Filter climate data for the relevant historical period matching outage data / model training
    # Determine required range (e.g., 2015 up to MAX_EVALUATION_YEAR)
    hist_start_year_model = 2015 # Start year of model features
    hist_end_year_model = MAX_EVALUATION_YEAR # Defined in Cell #2
    logger.info(f"Filtering climate features for relevant historical period: {hist_start_year_model}-{hist_end_year_model}")
    df_climate_hist = df_climate_hist[
        (df_climate_hist['year'] >= hist_start_year_model) &
        (df_climate_hist['year'] <= hist_end_year_model)
    ].copy()

    # Filter for target states (although file should already be filtered)
    df_climate_hist['state_fips'] = df_climate_hist['county_geoid'].str[:2]
    df_climate_hist = df_climate_hist[df_climate_hist['state_fips'].isin(target_state_fips)]

    if df_climate_hist.empty:
        raise ValueError("No historical climate data found for the specified period and states.")

    logger.info(f"Historical climate features prepared. Shape: {df_climate_hist.shape}")
    display(df_climate_hist.head())

except Exception as e:
    logger.exception(f"Failed loading/preparing historical climate features: {e}")
    raise e

[2025-04-14 13:25:45] [INFO] OutageAnalysisLogger - --- Loading Historical Climate Features for Target States ---
[2025-04-14 13:25:45] [INFO] OutageAnalysisLogger - Loading historical climate features from: ../output/processed_climate/county_monthly_climate_variables_target_states.parquet
[2025-04-14 13:25:45] [INFO] OutageAnalysisLogger - Loaded climate data shape: (362628, 10)
[2025-04-14 13:25:45] [INFO] OutageAnalysisLogger - Filtering climate features for relevant historical period: 2015-2024
[2025-04-14 13:25:45] [INFO] OutageAnalysisLogger - Historical climate features prepared. Shape: (172680, 12)


Unnamed: 0,county_geoid,time,Max Temp,Spec Humid,Sens Heat Flux,Precip,Sea Level Press,Soil Moisture,Wind Speed,Altitude,year,state_fips
0,1001,2015-01-01,23.411621,0.004888,28.097054,2.217419,1026.149536,31.223341,2.761991,143.030991,2015,1
1,1001,2015-02-01,23.52356,0.006459,30.694408,4.660017,1023.142578,32.409649,3.117097,143.030991,2015,1
2,1001,2015-03-01,28.190033,0.00717,58.857788,4.170286,1022.330811,31.807745,3.139431,143.030991,2015,1
3,1001,2015-04-01,31.445557,0.009836,61.469265,4.29793,1016.757202,30.847223,3.183161,143.030991,2015,1
4,1001,2015-05-01,34.129486,0.013535,49.009747,5.193621,1018.477417,30.793398,2.228818,143.030991,2015,1


In [None]:
# --- NEW Cell 5.5: Recreate Features on Historical Climate Data ---

logger.info("--- Recreating Features on Loaded Historical Climate Data ---")

try:
    # Check inputs
    if 'df_climate_hist' not in locals() or df_climate_hist.empty:
        raise ValueError("df_climate_hist (from Cell 5) missing or empty.")
    if 'df_indices' not in locals() or df_indices.empty:
        # Attempt to load indices if not present (assuming Cell 3.5 code exists)
        logger.warning("df_indices not found, attempting to re-run index loading...")
        # You might need to copy the contents of Cell 3.5 here or ensure it runs first
        # For now, assume it needs to be available
        raise ValueError("Climate indices (df_indices) missing. Ensure index loading cell ran.")
    if 'TARGET_VARIABLES' not in locals(): raise NameError("TARGET_VARIABLES missing.")
    if 'LAG_MONTHS' not in locals(): raise NameError("LAG_MONTHS missing.")

    # --- Identify Variable Types ---
    df_climate_hist['year'] = df_climate_hist['time'].dt.year # Ensure year/month exist
    df_climate_hist['month'] = df_climate_hist['time'].dt.month
    fixed_var_names = ['Altitude']
    known_non_climate_cols = ['county_geoid', 'time', 'year', 'month', 'state_fips'] # Include state_fips if added
    all_climate_cols = [col for col in df_climate_hist.columns if col not in known_non_climate_cols and not col.endswith('_clim') and not col.endswith('_anom')]
    time_varying_climate_vars = [col for col in all_climate_cols if col not in fixed_var_names]
    fixed_climate_vars = [col for col in all_climate_cols if col in fixed_var_names]
    logger.info(f"Identified Time-Varying Vars: {time_varying_climate_vars}")
    logger.info(f"Identified Fixed Vars: {fixed_climate_vars}")

    # --- Calculate Climatology & Anomalies ---
    logger.info("Calculating monthly climatology and anomalies...")
    anomaly_cols = []
    climatology_cols = []
    for var in time_varying_climate_vars + fixed_climate_vars:
        clim_col = f'{var}_clim'; anom_col = f'{var}_anom'; climatology_cols.append(clim_col)
        df_climate_hist[clim_col] = df_climate_hist.groupby(['county_geoid', 'month'])[var].transform('mean')
        df_climate_hist[anom_col] = df_climate_hist[var] - df_climate_hist[clim_col]; anomaly_cols.append(anom_col)
    logger.info("Anomaly calculation complete.")

    # --- Merge Climate Indices ---
    logger.info("Merging climate indices...")
    # Ensure year columns have same type for merging
    df_climate_hist['year'] = df_climate_hist['year'].astype(df_indices['year'].dtype)
    df_hist_featured = pd.merge(df_climate_hist, df_indices, on='year', how='left') # Use df_climate_hist as base
    logger.debug(f"Shape after merging indices: {df_hist_featured.shape}")
    index_cols = [col for col in df_indices.columns if col != 'year']
    index_nan_counts = df_hist_featured[index_cols].isnull().sum()
    if index_nan_counts.sum() > 0: logger.warning(f"NaNs found in merged indices:\n{index_nan_counts[index_nan_counts > 0]}")

    # --- Impute Missing Values (especially for indices) ---
    impute_cols = anomaly_cols + index_cols + fixed_climate_vars
    impute_cols = [col for col in impute_cols if col in df_hist_featured.columns]
    logger.info(f"Imputing NaNs in features: {impute_cols} ...")
    df_hist_featured.sort_values(by=['county_geoid', 'time'], inplace=True)
    anom_impute_cols = [c for c in anomaly_cols if c in df_hist_featured.columns]; index_impute_cols = [c for c in index_cols if c in df_hist_featured.columns]; fixed_impute_cols = [c for c in fixed_climate_vars if c in df_hist_featured.columns]
    if anom_impute_cols: df_hist_featured[anom_impute_cols] = df_hist_featured.groupby('county_geoid')[anom_impute_cols].ffill().bfill()
    if index_impute_cols: df_hist_featured[index_impute_cols] = df_hist_featured[index_impute_cols].ffill().bfill()
    if fixed_impute_cols:
        for fv in fixed_impute_cols:
             if df_hist_featured[fv].isnull().any(): med = df_hist_featured[fv].median(); df_hist_featured[fv].fillna(med, inplace=True)
    remaining_nans = df_hist_featured[impute_cols].isnull().sum()
    if remaining_nans.sum() > 0: logger.warning(f"NaNs remain after imputation:\n{remaining_nans[remaining_nans > 0]}"); logger.warning("Dropping rows."); df_hist_featured.dropna(subset=impute_cols, inplace=True)


    # --- Create Lagged Features for ANOMALIES ---
    logger.info(f"Creating lagged features for months: {LAG_MONTHS}...")
    lag_cols = []
    time_varying_anom_cols = [f'{var}_anom' for var in time_varying_climate_vars if f'{var}_anom' in df_hist_featured.columns]
    if time_varying_anom_cols:
        for lag in LAG_MONTHS:
            for anom_var in time_varying_anom_cols: lag_col_name = f'{anom_var}_lag{lag}'; df_hist_featured = df_hist_featured.assign(**{lag_col_name: df_hist_featured.groupby('county_geoid')[anom_var].shift(lag)}); lag_cols.append(lag_col_name)

    # --- Create Time Features ---
    df_hist_featured['month_sin'] = np.sin(2 * np.pi * df_hist_featured['month']/12)
    df_hist_featured['month_cos'] = np.cos(2 * np.pi * df_hist_featured['month']/12)

    # --- Drop rows with NaNs introduced by lagging ---
    logger.info(f"Dropping rows with lag NaNs.")
    lag_cols_exist = [col for col in lag_cols if col in df_hist_featured.columns]
    if lag_cols_exist: df_hist_featured.dropna(subset=lag_cols_exist, inplace=True)
    logger.info(f"Shape after feature engineering & lag drop: {df_hist_featured.shape}")
    if df_hist_featured.empty: raise ValueError("DataFrame empty after lag drop.")

    # --- Final Check for Feature Columns ---
    # feature_cols list should be available from Cell #6 (loaded from scaler)
    if 'feature_cols' not in locals() or not feature_cols:
         raise NameError("feature_cols list not found. Ensure Cell #6 ran.")
    missing_features_final = [col for col in feature_cols if col not in df_hist_featured.columns]
    if missing_features_final:
        raise ValueError(f"Features expected by model are STILL missing after engineering: {missing_features_final}")

    logger.info("Feature engineering complete for historical climate data.")
    display(df_hist_featured.head())

except Exception as e:
    logger.exception(f"Failed during historical feature engineering: {e}")
    raise e

In [6]:
# --- Cell 6: Load Scaler & Trained Storm Model ---

logger.info("--- Loading Scaler and Trained Storm Prediction Model ---")

# Paths defined in Cell #2
# SCALER_PATH = os.path.join(MODEL_OUTPUT_DIR, f"{TARGET_EVENT_TYPE}_{TARGET_STATE_ABBR}_AnomInd_scaler.joblib")
# MODEL_PATH = os.path.join(MODEL_OUTPUT_DIR, f"{TARGET_EVENT_TYPE}_{TARGET_STATE_ABBR}_AnomInd_lgbm_calibrated.joblib")

scaler = None
model = None
feature_cols = [] # Initialize

try:
    # Load Scaler
    logger.info(f"Loading scaler from: {SCALER_PATH}")
    if not os.path.exists(SCALER_PATH): raise FileNotFoundError(f"Scaler file not found: {SCALER_PATH}")
    scaler = joblib.load(SCALER_PATH)
    logger.info("Scaler loaded successfully.")
    # Get feature names scaler was trained on
    if hasattr(scaler, 'feature_names_in_'):
        feature_cols = list(scaler.feature_names_in_)
        logger.info(f"Retrieved {len(feature_cols)} feature names from scaler.")
    else:
         # Attempt to infer from climate data if needed (less reliable)
         if 'df_climate_hist' in locals() and not df_climate_hist.empty:
              cols_to_exclude = ['county_geoid', 'time', 'year', 'month', 'state_fips'] + [c for c in df_climate_hist if c.endswith('_clim')] # Exclude keys, base vars, clim vars
              feature_cols = [c for c in df_climate_hist.columns if c not in cols_to_exclude]
              logger.warning(f"Scaler missing feature names. Inferred {len(feature_cols)} features from climate data columns. Ensure this is correct!")
         else:
              raise ValueError("Cannot determine feature columns: Scaler lacks names and climate data not loaded.")

    # Load Model
    logger.info(f"Loading model from: {MODEL_PATH}")
    if not os.path.exists(MODEL_PATH): raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
    model = joblib.load(MODEL_PATH) # Load the calibrated LGBM model
    logger.info("Storm prediction model loaded successfully.")
    logger.debug(f"Model type: {type(model)}")

except FileNotFoundError as fnf: logger.exception(f"{fnf}"); raise fnf
except ValueError as ve: logger.exception(f"{ve}"); raise ve
except Exception as e: logger.exception(f"Failed loading model/scaler: {e}"); raise e

# Display first few feature names
if feature_cols: logger.debug(f"Model expects features like: {feature_cols[:5]}...")

[2025-04-14 13:25:47] [INFO] OutageAnalysisLogger - --- Loading Scaler and Trained Storm Prediction Model ---
[2025-04-14 13:25:47] [INFO] OutageAnalysisLogger - Loading scaler from: ../output/models/scs_wind_target_states_scaler.joblib
[2025-04-14 13:25:47] [INFO] OutageAnalysisLogger - Scaler loaded successfully.
[2025-04-14 13:25:47] [INFO] OutageAnalysisLogger - Retrieved 35 feature names from scaler.
[2025-04-14 13:25:47] [INFO] OutageAnalysisLogger - Loading model from: ../output/models/scs_wind_target_AnomInd_lgbm_calibrated.joblib
[2025-04-14 13:25:47] [INFO] OutageAnalysisLogger - Storm prediction model loaded successfully.


In [10]:
# --- Cell 7: Generate Historical Storm Predictions (using df_hist_featured) ---

logger.info("--- Generating Historical Storm Likelihood Predictions ---")
df_hist_pred = pd.DataFrame() # Initialize

try:
    # --- Check inputs ---
    # *** Use the DataFrame with engineered features from Cell 5.5 ***
    if 'df_hist_featured' not in locals() or df_hist_featured.empty:
        raise ValueError("Engineered historical features (df_hist_featured from Cell 5.5) missing or empty.")
    if 'scaler' not in locals() or scaler is None: raise NameError("Scaler not loaded (Cell 6).")
    if 'model' not in locals() or model is None: raise NameError("Model not loaded (Cell 6).")
    if 'feature_cols' not in locals() or not feature_cols: raise ValueError("feature_cols list missing (Cell 6).")
    # --- End Checks ---


    # --- Prepare historical features (Select feature columns from df_hist_featured) ---
    logger.info("Selecting final feature columns from engineered historical data...")
    # Ensure feature columns exist (should be guaranteed by check in Cell 5.5)
    missing_features = [col for col in feature_cols if col not in df_hist_featured.columns]
    if missing_features:
        # This shouldn't happen if Cell 5.5 ran correctly, but double-check
        raise ValueError(f"Engineered historical data missing required features: {missing_features}")

    # Use the order defined by the scaler
    feature_names_ordered = scaler.feature_names_in_ if hasattr(scaler, 'feature_names_in_') else feature_cols
    X_hist = df_hist_featured[feature_names_ordered].copy() # Select from df_hist_featured


    # --- Scale features ---
    logger.info(f"Scaling historical features (shape: {X_hist.shape})...")
    # Check for NaNs BEFORE scaling (should have been handled in Cell 5.5, but check again)
    nan_check = X_hist.isnull().sum()
    if nan_check.sum() > 0:
        logger.error(f"Unexpected NaNs found in features BEFORE scaling:\n{nan_check[nan_check > 0]}")
        raise ValueError("NaNs found in features before scaling, imputation in Cell 5.5 failed.")

    X_hist_scaled = scaler.transform(X_hist)
    logger.info("Historical features scaled.")

    # --- Predict probabilities ---
    logger.info("Predicting historical probabilities...")
    with warnings.catch_warnings(): warnings.filterwarnings("ignore"); hist_pred_proba = model.predict_proba(X_hist_scaled)[:, 1]

    # --- Combine predictions with identifiers ---
    # Get identifiers from df_hist_featured (ensure time and state_fips are there)
    identifier_cols = ['county_geoid', 'time', 'state_fips']
    if not all(col in df_hist_featured.columns for col in identifier_cols):
        raise ValueError(f"df_hist_featured missing identifier columns: Need {identifier_cols}")

    df_hist_pred = df_hist_featured[identifier_cols].copy()
    # Important: Ensure index alignment if X_hist was created differently
    if not df_hist_pred.index.equals(X_hist.index):
         logger.warning("Index mismatch between identifiers and features. Resetting index.")
         df_hist_pred.reset_index(drop=True, inplace=True)
         # Need to align probs based on the original index before it was potentially dropped
         # Safer: Assign directly if confident lengths match after all processing
         if len(df_hist_pred) != len(hist_pred_proba):
             raise ValueError("Length mismatch after processing, cannot assign probabilities.")

    df_hist_pred['predicted_prob'] = hist_pred_proba
    logger.info("Historical probabilities generated.")
    display(df_hist_pred.head())

except Exception as e:
    logger.exception(f"Failed generating historical predictions: {e}")
    raise e

[2025-04-14 13:38:23] [INFO] OutageAnalysisLogger - --- Generating Historical Storm Likelihood Predictions ---
[2025-04-14 13:38:23] [ERROR] OutageAnalysisLogger - Failed generating historical predictions: Engineered historical features (df_hist_featured from Cell 5.5) missing or empty.
Traceback (most recent call last):
  File "C:\Users\60864\AppData\Local\Temp\ipykernel_36664\2536494032.py", line 10, in <module>
    raise ValueError("Engineered historical features (df_hist_featured from Cell 5.5) missing or empty.")
ValueError: Engineered historical features (df_hist_featured from Cell 5.5) missing or empty.


ValueError: Engineered historical features (df_hist_featured from Cell 5.5) missing or empty.

In [None]:
# --- Cell 8: Aggregate Historical Predictions to State-Month ---

logger.info("--- Aggregating Historical Predictions to State-Month ---")
df_hist_state_monthly_expected = pd.DataFrame() # Initialize

if 'df_hist_pred' not in locals() or df_hist_pred.empty:
    logger.error("Historical predictions (df_hist_pred) missing. Cannot aggregate.")
else:
    try:
        logger.info("Summing county probabilities to get state-level expected hits...")
        # Group by state and month-start time, sum probabilities
        df_hist_state_monthly_expected = df_hist_pred.groupby(
            ['state_fips', pd.Grouper(key='time', freq='MS')]
        )['predicted_prob'].sum().reset_index()
        # Rename for clarity
        df_hist_state_monthly_expected.rename(columns={'predicted_prob': 'pred_expected_hits_hist'}, inplace=True)

        logger.info(f"Aggregation complete. Shape: {df_hist_state_monthly_expected.shape}")
        print("\n--- Sample State-Month Historical Expected Hits ---")
        display(df_hist_state_monthly_expected.head())

    except Exception as e:
        logger.exception(f"Failed aggregating historical predictions: {e}")
        raise e

In [None]:
# --- Cell 9: Merge Outage Metrics & Storm Predictions ---

logger.info("--- Merging Monthly Outage Data with Monthly Predicted Hits ---")
df_merged_outage_pred = pd.DataFrame() # Initialize

try:
    # Check inputs
    if 'df_outages_monthly_agg' not in locals() or df_outages_monthly_agg.empty:
        raise ValueError("Aggregated monthly outage data (df_outages_monthly_agg) not available.")
    if 'df_hist_state_monthly_expected' not in locals() or df_hist_state_monthly_expected.empty:
        raise ValueError("Aggregated historical predicted hits (df_hist_state_monthly_expected) not available.")

    # Ensure time columns are datetime
    df_outages_monthly_agg['time'] = pd.to_datetime(df_outages_monthly_agg['time'])
    df_hist_state_monthly_expected['time'] = pd.to_datetime(df_hist_state_monthly_expected['time'])

    logger.info("Performing merge on 'state_fips' and 'time'...")
    logger.debug(f"Outage data shape: {df_outages_monthly_agg.shape}, Time range: {df_outages_monthly_agg['time'].min()} - {df_outages_monthly_agg['time'].max()}")
    logger.debug(f"Prediction data shape: {df_hist_state_monthly_expected.shape}, Time range: {df_hist_state_monthly_expected['time'].min()} - {df_hist_state_monthly_expected['time'].max()}")

    # Use inner merge to keep only state-months present in both datasets
    df_merged_outage_pred = pd.merge(
        df_outages_monthly_agg,
        df_hist_state_monthly_expected,
        on=['state_fips', 'time'],
        how='inner' # Use inner join to only analyze periods where both exist
    )

    if df_merged_outage_pred.empty:
        logger.warning("Merge resulted in empty DataFrame. Check time ranges and state FIPS codes in both datasets.")
    else:
        logger.info(f"Merge successful. Final shape for analysis: {df_merged_outage_pred.shape}")
        logger.info(f"Time range of merged data: {df_merged_outage_pred['time'].min()} - {df_merged_outage_pred['time'].max()}")
        print("\n--- Sample Merged Outage and Prediction Data ---")
        display(df_merged_outage_pred.head())

        # Check for NaNs in key columns after merge
        key_cols_check = TARGET_OUTAGE_METRICS + ['peak_pct_out', 'pred_expected_hits_hist']
        nan_check_merged = df_merged_outage_pred[[col for col in key_cols_check if col in df_merged_outage_pred.columns]].isnull().sum()
        if nan_check_merged.sum() > 0:
             logger.warning(f"NaNs present in merged data columns:\n{nan_check_merged[nan_check_merged > 0]}")
             # Optional: Drop rows with NaNs in target metric or predictor for modeling
             # target_metric = 'peak_pct_out' # Choose metric
             # df_merged_outage_pred.dropna(subset=[target_metric, 'pred_expected_hits_hist'], inplace=True)
             # logger.info(f"Shape after dropping NaNs for modeling: {df_merged_outage_pred.shape}")


except Exception as e:
     logger.exception(f"Failed merging outage and prediction data: {e}")
     raise e

In [None]:
# --- Cell 10: Analyze Relationship & Build Linking Model ---
import statsmodels.formula.api as smf
import seaborn as sns # For correlation heatmap

logger.info("--- Analyzing Relationship between Predicted Storms and Outages ---")

# Choose the target outage metric to analyze/model
target_metric = 'peak_pct_out' # Example: Predict peak percentage of customers out
# Or: target_metric = 'CustomerHoursOutTotal'
# Or: target_metric = 'MaxCustomersOutTotal'

# Ensure chosen metric and predictor exist and have no NaNs for correlation/modeling
analysis_cols = [target_metric, 'pred_expected_hits_hist']
if not all(col in df_merged_outage_pred.columns for col in analysis_cols):
     logger.error(f"Missing required columns for analysis: Need {analysis_cols}")
     # Stop or handle error
else:
    df_analysis = df_merged_outage_pred[analysis_cols].dropna()
    logger.info(f"Using {len(df_analysis)} complete records for analysis.")

    if len(df_analysis) > 1: # Need > 1 record for correlation/regression
        # --- Correlation Analysis ---
        logger.info("Calculating correlation...")
        correlation = df_analysis[target_metric].corr(df_analysis['pred_expected_hits_hist'])
        logger.info(f"Correlation between {target_metric} and Predicted Expected Hits: {correlation:.4f}")
        print(f"\nCorrelation ({target_metric} vs Pred. Hits): {correlation:.4f}")

        # Scatter Plot
        plt.figure(figsize=(8, 6))
        sns.scatterplot(data=df_analysis, x='pred_expected_hits_hist', y=target_metric, alpha=0.5)
        plt.title(f'{target_metric} vs. Predicted Expected SCS Hits per State-Month')
        plt.xlabel("Predicted Expected SCS Hits (Sum of County Probs)")
        plt.ylabel(f"State-Month {target_metric}")
        plt.grid(True)
        plt.tight_layout()
        plt.show()

        # --- Simple Linking Model Example (OLS Regression) ---
        # Predicting outage metric based on predicted storm likelihood
        # Formula requires valid names (no spaces etc.)
        df_analysis = df_analysis.rename(columns={
            target_metric: 'Outage_Metric',
            'pred_expected_hits_hist': 'Pred_Expected_Hits'
        })
        formula = "Outage_Metric ~ Pred_Expected_Hits"
        try:
            logger.info(f"Fitting OLS model: {formula}")
            ols_model = smf.ols(formula, data=df_analysis).fit()
            logger.info("OLS linking model fitted.")
            print("\n--- Simple Linking Model Summary (OLS) ---")
            print(ols_model.summary())

            # Could save this model: ols_model.save("path/to/linking_model.pickle")

            # Add predictions from linking model for comparison
            df_analysis['Linked_Pred_Outage'] = ols_model.predict(df_analysis[['Pred_Expected_Hits']])

            plt.figure(figsize=(8, 6))
            plt.scatter(df_analysis['Outage_Metric'], df_analysis['Linked_Pred_Outage'], alpha=0.3, s=10)
            plot_min = min(0, df_analysis['Outage_Metric'].min(), df_analysis['Linked_Pred_Outage'].min())
            plot_max = max(df_analysis['Outage_Metric'].max(), df_analysis['Linked_Pred_Outage'].max()) * 1.05
            plt.plot([plot_min, plot_max], [plot_min, plot_max], 'r--', label='Ideal Fit')
            plt.xlabel(f"Actual {target_metric}")
            plt.ylabel(f"Predicted {target_metric} (from Storm Hits)")
            plt.title('Linking Model: Actual vs. Predicted Outage Metric')
            plt.grid(True); plt.legend(); plt.tight_layout(); plt.show()

        except Exception as model_e:
            logger.exception(f"Failed to fit/evaluate linking model: {model_e}")

    else:
        logger.warning("Not enough data points for correlation/regression analysis after dropping NaNs.")

In [None]:
# --- Cell 11: Future Outage Projections (Optional) ---

logger.info("--- Generating Future Outage Projections using Linking Model ---")

# Check if linking model was created
if 'ols_model' not in locals() or ols_model is None:
     logger.warning("Linking model (ols_model) not found or failed to train. Skipping future outage projection.")
else:
     try:
          # --- Load Future Storm Projections ---
          # Path defined in Cell #2 - points to ANNUAL county projections
          # Need MONTHLY state projections - recalculate here from X_future
          logger.info("Generating MONTHLY state-level future storm predictions...")

          # Check required inputs
          if 'X_future' not in locals() or X_future.empty: raise ValueError("X_future missing.")
          if 'future_identifiers' not in locals() or future_identifiers.empty: raise ValueError("future_identifiers missing.")
          if 'scaler' not in locals() or scaler is None: raise NameError("Scaler missing.")
          if 'model' not in locals() or model is None: raise NameError("Storm model missing.")
          if not feature_cols: raise ValueError("feature_cols missing.")

          # Prepare/Scale future features
          logger.debug("Preparing/Scaling future climate features...")
          X_future_prep = X_future[feature_cols].copy()
          X_future_scaled = scaler.transform(X_future_prep)

          # Predict future probabilities
          logger.debug("Predicting future probabilities...")
          with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning); future_probs = model.predict_proba(X_future_scaled)[:, 1]

          # Combine with identifiers
          df_future_pred_monthly = future_identifiers.copy(); df_future_pred_monthly.reset_index(drop=True, inplace=True)
          df_future_pred_monthly['predicted_prob'] = future_probs
          df_future_pred_monthly['time'] = pd.to_datetime(df_future_pred_monthly['time'])
          # Add state fips
          df_future_pred_monthly['state_fips'] = df_future_pred_monthly['county_geoid'].str[:2]
          df_future_pred_monthly = df_future_pred_monthly[df_future_pred_monthly['state_fips'].isin(target_state_fips)] # Ensure only target states

          # Aggregate future probabilities to state-month
          logger.debug("Aggregating future probabilities to state-month...")
          df_future_state_monthly_expected = df_future_pred_monthly.groupby(
               ['state_fips', pd.Grouper(key='time', freq='MS')]
          )['predicted_prob'].sum().reset_index()
          df_future_state_monthly_expected.rename(columns={'predicted_prob': 'Pred_Expected_Hits'}, inplace=True) # Use same name as linking model input

          if df_future_state_monthly_expected.empty:
               raise ValueError("Future state-level storm predictions are empty.")

          # --- Apply Linking Model ---
          logger.info("Applying linking model to future storm predictions...")
          # Use the linking model (ols_model) trained in Cell #10
          # Ensure input columns match (needs 'Pred_Expected_Hits')
          future_outage_predictions = ols_model.predict(df_future_state_monthly_expected[['Pred_Expected_Hits']])
          # Ensure non-negative if predicting metrics like hours/counts
          # if target_metric in ['CustomerHoursOutTotal', 'MaxCustomersOutTotal']:
          #      future_outage_predictions = np.maximum(0, future_outage_predictions)

          df_future_outage_proj = df_future_state_monthly_expected[['state_fips', 'time']].copy()
          df_future_outage_proj[f'Projected_{target_metric}'] = future_outage_predictions

          logger.info("Future outage projections generated.")
          print(f"\n--- Sample Future Projected Outages ({target_metric}) ---")
          display(df_future_outage_proj.head())

          # --- Save Future Outage Projections ---
          OUTAGE_PROJ_PATH = os.path.join(ANALYSIS_OUTPUT_DIR, f"projected_{target_metric}_by_state_month.csv")
          try:
               df_future_outage_proj.to_csv(OUTAGE_PROJ_PATH, index=False)
               logger.info(f"Saved future outage projections to {OUTAGE_PROJ_PATH}")
          except Exception as save_e:
               logger.exception(f"Failed to save outage projections: {save_e}")

          # --- Plot Future Outage Projections (Example: Time Series for a State) ---
          state_to_plot = target_state_fips[0] # Plot first state
          plot_df_future = df_future_outage_proj[df_future_outage_proj['state_fips'] == state_to_plot]
          if not plot_df_future.empty:
               plt.figure(figsize=(15, 6))
               plt.plot(plot_df_future['time'], plot_df_future[f'Projected_{target_metric}'], marker='.', linestyle='-')
               plt.title(f"Projected Future {target_metric} for State {state_to_plot}")
               plt.xlabel("Time"); plt.ylabel(f"Projected {target_metric}")
               plt.grid(True); plt.tight_layout(); plt.show()


     except FileNotFoundError as fnf: logger.exception(f"{fnf}")
     except ValueError as ve: logger.exception(f"ValueError: {ve}")
     except NameError as ne: logger.exception(f"Missing variable: {ne}")
     except Exception as e: logger.exception(f"Failed future outage projection: {e}")

logger.info("--- Outage Analysis Notebook Complete ---")