In [None]:
#==========================================
# Title:  Climate Credit Risk Model
# Author: Vachan
# Email : vachan@iitb.ac.in
#==========================================

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import json

In [2]:
def calculate_pd(log_odds):
    """Converts log-odds to a probability."""
    return 1 / (1 + np.exp(-log_odds))


In [3]:
def load_pd_model(file_path):
    """Loads the pre-trained PD model details from a JSON file."""
    print(f"--- Loading PD Model details from {file_path} ---")
    try:
        with open(file_path, 'r') as f:
            pd_model_details = json.load(f)
        return pd_model_details
    except FileNotFoundError:
        print(f"Error: The model details file was not found at {file_path}")
        print("Please ensure you have run the first script to generate this file.")
        return None
    except json.JSONDecodeError:
        print(f"Error: The file at {file_path} is not a valid JSON file.")
        return None

In [4]:
def predict_future_pd(customer_data, predicted_risk_drivers, pd_model_details):
    """
    Calculates the Probability of Default (PD) for each customer for 2026
    under various NGFS scenarios.

    Args:
        customer_data (pd.DataFrame): The original dataframe with customer-level data.
        predicted_risk_drivers (pd.DataFrame): A dataframe with the predicted 2026 values
                                               for each risk driver under each scenario.
        pd_model_details (dict): A dictionary containing the variables and coefficients
                                 of the pre-trained logistic regression model.
    """
    print("--- Starting PD Calculation for 2026 ---")

    # Get the list of variables from the PD model
    model_vars = pd_model_details['variables']
    model_coeffs = pd_model_details['coefficients']

    # We need a base set of features for each customer that are not predicted
    # Let's take the most recent known data (2021) as the base
    base_customer_df = customer_data.copy()

    # Create a list to store results
    all_scenario_pds = []

    # Iterate over each scenario from the prediction results
    for scenario in predicted_risk_drivers['Scenario'].unique():
        print(f"\n--- Calculating PD for Scenario: {scenario} ---")
        
        scenario_predictions = predicted_risk_drivers[predicted_risk_drivers['Scenario'] == scenario]
        
        # Start with the base customer data for this scenario
        scenario_customer_df = base_customer_df.copy()
        
        # Update the customer data with the predicted 2026 risk driver values for this scenario
        for _, row in scenario_predictions.iterrows():
            driver = row['risk_driver']
            predicted_value = row['predicted_value_2026']
            if driver in scenario_customer_df.columns:
                print(f"  Updating '{driver}' with predicted value: {predicted_value:.4f}")
                scenario_customer_df[driver] = predicted_value

        # --- Calculate Log-Odds ---
        # Start with the intercept (const)
        log_odds = model_coeffs.get('const', 0)
        
        print("  Calculating log-odds using formula: const + (coeff1 * var1) + ...")
        # Add the weighted value of each variable
        for var in model_vars:
            if var in model_coeffs and var in scenario_customer_df.columns:
                log_odds += model_coeffs[var] * scenario_customer_df[var]
        
        # --- Calculate PD ---
        scenario_customer_df['predicted_pd_2026'] = calculate_pd(log_odds)
        scenario_customer_df['Scenario'] = scenario
        
        all_scenario_pds.append(scenario_customer_df[['customer_id', 'Scenario', 'predicted_pd_2026']])

    # --- Combine and Summarize Results ---
    if all_scenario_pds:
        final_pd_df = pd.concat(all_scenario_pds, ignore_index=True)
        df1=final_pd_df
        df1.to_csv('Final PD.csv')
        
        print("\n\n==================== FINAL 2026 PD PREDICTIONS ====================")
        print("\n--- Sample PDs for individual customers ---")
        print(final_pd_df.sample(10))

        print("\n--- Average Predicted PD per Scenario ---")
        pd_summary = final_pd_df.groupby('Scenario')['predicted_pd_2026'].mean().reset_index()
        pd_summary.rename(columns={'predicted_pd_2026': 'average_pd_2026'}, inplace=True)
        print(pd_summary)
    else:
        print("\nCould not generate PD predictions.")



In [5]:
# --- Main Execution ---
if __name__ == '__main__':
    try:
        # --- 1. Define File Paths ---
        pd_model_file = "C:/Users/vacha/OneDrive - Indian Institute of Technology Bombay/Climate Finance/pd_model_details.json"
        customer_data_file = "C:/Users/vacha/OneDrive - Indian Institute of Technology Bombay/Climate Finance/Retail Portfoilio Research/Auto Risk Kaggle/data.csv"
        predicted_drivers_file = "C:/Users/vacha/predicted_risk_drivers_2026.csv" # This file is created by the previous script.

        # --- 2. Load All Required Data ---
        pd_model = load_pd_model(pd_model_file)
        if pd_model is None:
            # Stop execution if model can't be loaded
            exit()

        customer_df = pd.read_csv(customer_data_file)
        
        # Load the results from the previous script.
        try:
            predicted_drivers_df = pd.read_csv(predicted_drivers_file)
            print(f"--- Successfully loaded predicted risk drivers from '{predicted_drivers_file}' ---")
        except FileNotFoundError:
            print(f"CRITICAL ERROR: The file '{predicted_drivers_file}' was not found.")
            print("Please run the previous script (future_data_simulation_model.py) to generate it.")
            exit() # Stop execution if the predictions file is missing.

        print("-" * 55)

        # --- 3. Calculate and Display Future PD ---
        predict_future_pd(
            customer_data=customer_df,
            predicted_risk_drivers=predicted_drivers_df,
            pd_model_details=pd_model
        )

    except FileNotFoundError as e:
        print(f"\nError: A required file was not found. Details: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")



--- Loading PD Model details from C:/Users/vacha/OneDrive - Indian Institute of Technology Bombay/Climate Finance/pd_model_details.json ---
--- Successfully loaded predicted risk drivers from 'C:/Users/vacha/predicted_risk_drivers_2026.csv' ---
-------------------------------------------------------
--- Starting PD Calculation for 2026 ---

--- Calculating PD for Scenario: Baseline ---
  Updating 'ltv' with predicted value: 63.9864
  Updating 'average_age' with predicted value: 13.0878
  Updating 'credit_score' with predicted value: 306.3839
  Updating 'last_six_month_defaulted_no' with predicted value: 0.0154
  Updating 'credit_history' with predicted value: 18.2206
  Calculating log-odds using formula: const + (coeff1 * var1) + ...

--- Calculating PD for Scenario: DAPS_AFR_R ---
  Updating 'ltv' with predicted value: 66.5777
  Updating 'average_age' with predicted value: 12.0912
  Updating 'credit_score' with predicted value: 306.3407
  Updating 'last_six_month_defaulted_no' with pr