In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
import pandas as pd
from datetime import datetime

# Import your existing, trusted function from your project structure
from account_data_adhoc.core import query_df_on_date, get_last_business_day

print("Starting indirect loan report generation...")

# Define the overall date range for the report
start_date = datetime(2020, 1, 1)
end_date = datetime(2024, 12, 31)

# Generate a list of all month-end dates to use for our snapshots
snapshot_dates = pd.date_range(start=start_date, end=end_date, freq='ME')

print(f"Generated {len(snapshot_dates)} monthly snapshot dates to process.")

In [None]:
snapshot_dates

In [None]:
monthly_originations_dfs = []

print("Collecting loan originations month by month...")

for month_end_date in snapshot_dates:
    print(f"--- Processing snapshot for {month_end_date.strftime('%Y-%m')} ---")
    
    # Get a valid business day snapshot date using your existing utility
    snapshot_business_date = datetime.strptime(get_last_business_day(month_end_date), '%Y-%m-%d %H:%M:%S')
    
    # Call your function to get the full, cleaned dataframe for that date
    df_snapshot = query_df_on_date(specified_date=snapshot_business_date)
    
    if df_snapshot.empty:
      print("Snapshot returned empty dataframe, skipping.")
      continue
      
    # Ensure contractdate is a datetime object for comparison
    df_snapshot['contractdate'] = pd.to_datetime(df_snapshot['contractdate'])
    
    # Filter the snapshot to find only loans originated IN THAT SPECIFIC MONTH
    df_month_originations = df_snapshot[
        (df_snapshot['contractdate'].dt.year == month_end_date.year) &
        (df_snapshot['contractdate'].dt.month == month_end_date.month)
    ].copy()
    
    if not df_month_originations.empty:
        print(f"Found {len(df_month_originations)} loans originated in {month_end_date.strftime('%Y-%m')}.")
        monthly_originations_dfs.append(df_month_originations)

print("\nFinished collecting all monthly data.")

In [None]:
if not monthly_originations_dfs:
    print("No loan originations found in the entire date range. Exiting.")
else:
    # Combine all monthly dataframes into a single master list
    all_originations_df = pd.concat(monthly_originations_dfs, ignore_index=True)

    # Safeguard: Drop duplicates based on the unique account number
    unique_originations_df = all_originations_df.drop_duplicates(subset=['acctnbr'], keep='first')
    print(f"\nTotal unique loans originated (2020-2024): {len(unique_originations_df)}")

    # Apply the required business rule filter
    indirect_loans_df = unique_originations_df[
        (unique_originations_df['Category'] == 'Indirect') |
        (unique_originations_df['currmiaccttypcd'].isin(['CM15', 'CM16']))
    ].copy()

    print(f"Found {len(indirect_loans_df)} unique 'Indirect' loans after filtering.")

In [None]:
if not indirect_loans_df.empty:
    print("Preparing final data for export...")

    # 1. Derive the 'Loan Paid or Open' status
    indirect_loans_df['Loan Status'] = 'Open'
    indirect_loans_df.loc[indirect_loans_df['curracctstatcd'].isin(['CLS','CO']), 'Loan Status'] = 'Closed/Charged Off' # Adjust 'CLOSE' if status code is different

    # 2. Add 'contract_year' for splitting into tabs
    indirect_loans_df['contract_year'] = indirect_loans_df['contractdate'].dt.year

    # 3. Define the final column structure and rename
    #    (Placeholders are used for fields not in your repomix's SQL queries)
    final_columns_map = {
        'acctnbr': 'Account Number',
        'contractdate': 'Loan Origination Date',
        'ownersortname': 'Applicant Last Name', # Needs splitting
        # Placeholder columns
        'Applicant First Name': 'N/A',
        'Co-Applicant Last Name': 'N/A',
        'Co-Applicant First Name': 'N/A',
        'Applicant Credit Score': 'N/A',
        'Co-Applicant Credit Score': 'N/A',
        'Model Year': 'N/A',
        'Vehicle Mileage': 'N/A',
        'Dealer Name': 'N/A',
        'Buy Rate': 'N/A',
        # Mapped columns
        'noteopenamt': 'Amount Financed',
        'notebal': 'Current Balance',
        'noteintrate': 'Contract Rate',
        'Loan Status': 'Loan Paid or Open',
        'closedate': 'Date Closed'
    }
    
    # Create placeholder columns so the rename works
    for new_col in final_columns_map.values():
      if new_col not in indirect_loans_df.columns and new_col not in indirect_loans_df.rename(columns=final_columns_map).columns:
          indirect_loans_df[new_col] = 'N/A'
          
    # TODO: Split 'ownersortname' into First and Last names
    # For now, we are just renaming it. You'll need to parse this field.
    
    # Select and rename columns
    report_df = indirect_loans_df.rename(columns=final_columns_map)
    final_report_df = report_df[list(final_columns_map.values())]

In [None]:
final_report_df

In [None]:
final_report_df = final_report_df.drop(columns=['N/A']).copy()

In [None]:
TEMP_OUTPUT = Path(r"C:\Users\w322800\Documents\gh\bcsb-prod\Reports\Indirect Lending\Adhoc_Pricing_Disparity_20250822\output\output.parquet")
final_report_df.to_parquet(TEMP_OUTPUT,index=False)

In [None]:
output_filename = 'indirect_loan_report_by_year_2020-2024.xlsx'

with pd.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
    print(f"\nWriting data to {output_filename}...")
    
    # Get the unique years from the data, sorted
    years_to_export = sorted(final_report_df['contract_year'].unique())
    
    for year in years_to_export:
        sheet_name = str(year)
        print(f" - Writing tab: {sheet_name}")
        
        df_year = final_report_df[final_report_df['contract_year'] == year].copy()
        
        # Drop the helper 'contract_year' column before exporting
        df_year.drop(columns=['contract_year'], inplace=True)
        
        df_year.to_excel(writer, sheet_name=sheet_name, index=False)

print("\nReport generation complete.")