In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
import src.config
import pandas
from deltalake import DeltaTable
from pathlib import Path
import pandas as pd

In [None]:
# Core logic specific to project/report

import pandas as pd
from pathlib import Path
from deltalake import DeltaTable
import src.config

# def main_report_creation():
# Get Lakehouse tables
df = DeltaTable(src.config.SILVER / "account").to_pandas()
wh_acctloan = DeltaTable(src.config.BRONZE / "wh_acctloan").to_pandas()
wh_loans = DeltaTable(src.config.BRONZE / "wh_loans").to_pandas()



In [None]:
df

In [None]:
# Need to get rtxn & wh_totalpaymentsdue

# Filter to SBA loans
df = df[df['product'].str.contains('SBA',case=False,na=False)].copy()
df = df[[
    'acctnbr',
    'product',
    'curracctstatcd',
    'ownersortname',
    'noteintrate',
    'Net Balance'
]].copy()
df = df.rename(columns={
    'noteintrate':'Interest Rate (%)',
    'Net Balance':'Closing Balance'
}).copy()

wh_acctloan = wh_acctloan[[
    'acctnbr',
    'currduedate'
]].copy()

wh_acctloan = wh_acctloan.rename(columns={
    'currduedate':'Next Installment Due Date (MM/DD/YYYY)'
}).copy()
wh_acctloan['Next Installment Due Date (MM/DD/YYYY)'] = pd.to_datetime(wh_acctloan['Next Installment Due Date (MM/DD/YYYY)']).copy()
wh_acctloan['acctnbr'] = wh_acctloan['acctnbr'].astype(str)

wh_loans = wh_loans[[
    'acctnbr',
    'intpaidtodate'
]].copy()

wh_loans = wh_loans.rename(columns={
    'intpaidtodate':'Interest Period To (MM/DD/YYYY)'
}).copy()
wh_loans['acctnbr'] = wh_loans['acctnbr'].astype(str)

merged_df = df.merge(wh_acctloan, on='acctnbr', how='left').merge(wh_loans, how='left', on='acctnbr')



In [None]:
merged_df

In [None]:
payments = data['wh_totalpaymentsdue'].copy()

In [None]:
payments

In [None]:
wh_acctloan

In [None]:

"""
Fetching data module. Aim is import all necessary fields up front, but if needed, you can define another function to be called here.

Usage:
    import src.cdutils.database

You need to set your own date that you want to see in effective date embedded in the SQL Query
"""

import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
from datetime import datetime, timedelta
from typing import Optional, Tuple

# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating
# This is an oracle DB so SQL syntax should match oracle.

def _get_trailing_month_dates() -> Tuple[datetime.date, datetime.date]:
    """
    Calculates the start and end date of the previous full month.
    
    Returns:
        A tuple containing the start date and end date of the trailing month.
    """
    today = datetime.today()
    # 1. Get the first day of the current month
    first_day_of_current_month = today.replace(day=1)
    # 2. Subtract one day to get the last day of the previous month
    end_of_trailing_month = first_day_of_current_month - timedelta(days=1)
    # 3. Get the first day of that previous month
    start_of_trailing_month = end_of_trailing_month.replace(day=1)
    
    return start_of_trailing_month, end_of_trailing_month


def fetch_rtxn():
    """
    Main data query for WH_RTXN.
    
    Filters for transactions in the trailing month that meet the criteria:
    - RTXNTYPCD = 'PDSB'
    - RTXNSTATCD = 'C'
    """
    # Define start & end dates for the trailing month
    start_date, end_date = _get_trailing_month_dates()

    # NOTE: Using bind parameters (:start_date, :end_date) is safer than f-strings.
    wh_rtxn = text("""
    SELECT
        a.*
    FROM
        COCCDM.WH_RTXN a
    WHERE
        a.RTXNSTATCD = 'C'
        AND a.RUNDATE BETWEEN TO_DATE(:start_date, 'YYYY-MM-DD') AND TO_DATE(:end_date, 'YYYY-MM-DD')
    """)

    wh_rtxnbal = text("""
    SELECT
        a.*
    FROM
        COCCDM.WH_RTXNBAL a
    WHERE
        a.RUNDATE BETWEEN TO_DATE(:start_date, 'YYYY-MM-DD') AND TO_DATE(:end_date, 'YYYY-MM-DD')
    """)

    queries = [
        # {
        #     'key': 'wh_rtxn',
        #     'sql': wh_rtxn.bindparams(
        #         start_date=start_date.strftime('%Y-%m-%d'),
        #         end_date=end_date.strftime('%Y-%m-%d')
        #     ),
        #     'engine': 2
        # },
        {
            'key': 'wh_rtxnbal',
            'sql': wh_rtxnbal.bindparams(
                start_date=start_date.strftime('%Y-%m-%d'),
                end_date=end_date.strftime('%Y-%m-%d')
            ),
            'engine': 2
        },    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data




In [None]:
data = fetch_rtxn()

In [None]:
# rtxn = data['wh_rtxn'].copy()
rtxnbal = data['wh_rtxnbal'].copy()

In [None]:
rtxnbal

In [None]:
rtxnbal.info()

In [None]:
# Convert 'amt' column to a numeric type. 
# errors='coerce' will turn any non-numeric values into NaN (Not a Number)
rtxnbal['amt'] = pd.to_numeric(rtxnbal['amt'], errors='coerce')

# It's good practice to fill any resulting NaN values, for instance with 0
rtxnbal['amt'] = rtxnbal['amt'].fillna(0)

In [None]:
# 1. Filter for 'PDSB' transaction types
advances_raw = rtxnbal[rtxnbal['rtxntypcd'] == 'PDSB'].copy()

# 2. Group by account number and sum the amount
df_advances = advances_raw.groupby('acctnbr')['amt'].sum().reset_index()

# 3. Rename the column for clarity
df_advances = df_advances.rename(columns={'amt': 'Advances'})

# 4. Convert acctnbr to string for merging
df_advances['acctnbr'] = df_advances['acctnbr'].astype(str)

print("\n--- Advances DataFrame (df_advances) ---")
print(df_advances)

In [None]:
# 1. Filter for 'SPMT' transaction types
spmt_raw = rtxnbal[rtxnbal['rtxntypcd'] == 'SPMT'].copy()

# 2. Group by account and balance category, then sum the amounts
payments = spmt_raw.groupby(['acctnbr', 'baltypcd'])['amt'].sum()

# 3. Unstack the 'balcatcd' level to turn 'BAL' and 'INT' into columns
df_payments = payments.unstack(level='baltypcd').fillna(0).reset_index()

# 4. Rename columns for clarity
df_payments = df_payments.rename(columns={
    'BAL': 'Principal Paid',
    'INT': 'Interest Paid'
})

# 5. Convert acctnbr to string for merging
df_payments['acctnbr'] = df_payments['acctnbr'].astype(str)

print("\n--- Combined Payments DataFrame ---")
print(df_payments)

In [None]:
df_advances

In [None]:
df_payments

In [None]:
# Optional: Create two fully separate DataFrames as requested
df_principal = df_payments[['acctnbr', 'Principal Paid']].copy()
df_interest = df_payments[['acctnbr', 'Interest Paid']].copy()

print("\n--- Principal Paid DataFrame (df_principal) ---")
print(df_principal)

print("\n--- Interest Paid DataFrame (df_interest) ---")
print(df_interest)

In [None]:
# Ensure the key in the main DataFrame is also a string
merged_df['acctnbr'] = merged_df['acctnbr'].astype(str)

# 1. Merge the advances
final_df = pd.merge(merged_df, df_advances, on='acctnbr', how='left')

# 2. Merge the payments (using the combined df_payments is more efficient)
final_df = pd.merge(final_df, df_payments, on='acctnbr', how='left')

# 3. After merging, fill any NaN values with 0
# This handles accounts that were in merged_df but had no payments/advances
final_df[['Advances', 'Principal Paid', 'Interest Paid']] = final_df[['Advances', 'Principal Paid', 'Interest Paid']].fillna(0)


print("\n--- Final Merged DataFrame ---")
print(final_df)

In [None]:
final_df