In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
import pandas as pd
import numpy as np

In [None]:
import cdutils.acct_file_creation.core
from datetime import datetime

In [None]:
df = cdutils.acct_file_creation.core.query_df_on_date()

In [None]:
df

In [None]:
# Fetch wh_org data
import src.non_profit_list.fetch_data
org_data = src.non_profit_list.fetch_data.fetch_data()

In [None]:
import cdutils.deduplication

In [None]:
# Dedupe org table
if 'wh_org' in org_data:
    dedupe_list = [{'df': org_data['wh_org'], 'field': 'orgnbr'}]
    org_data['wh_org'] = cdutils.deduplication.dedupe(dedupe_list)

In [None]:
wh_org = org_data['wh_org'].copy()

In [None]:
assert wh_org['orgnbr'].is_unique, "Not unique"

In [None]:
wh_org

In [None]:
INPUT_PATH_GENESIS = Path(r"C:\Users\w322800\Documents\gh\bcsb-prod\Reports\Community Relations\Non Profit List\input\Non-profit Organizations - Records - 8-18-2025.xlsx")
genesis_data = pd.read_excel(INPUT_PATH_GENESIS, header=1)

In [None]:
genesis_data

In [None]:
non_profits = wh_org.copy()

In [None]:
non_profits.info()

In [None]:
non_profits = non_profits[[
    'orgnbr',
    'orgname',
    'orgtypcd',
    'orgtypcddesc'
]].copy()

In [None]:
# Aggregate stats (total loans/deposits) per orgnbr
## orgs only
acct_orgs = df[df['taxrptforpersnbr'].isna()].copy()

In [None]:
import numpy as np

In [None]:
## loans/deposit categorization
# Account type mappings
ACCOUNT_TYPE_MAPPING = {
    'CML': 'Commercial Loan',
    'MLN': 'Commercial Loan',
    'CNS': 'Consumer Loan',
    'MTG': 'Residential Loan',
    'CK': 'Checking',
    'SAV': 'Savings',
    'TD': 'CD'
}

acct_orgs['Account Type'] = acct_orgs['mjaccttypcd'].map(ACCOUNT_TYPE_MAPPING)

In [None]:
acct_orgs = acct_orgs[~(acct_orgs['Account Type'].isna())].copy()

In [None]:
acct_orgs

In [None]:
MACRO_TYPE_MAPPING = {
    'CML': 'Loan',
    'MLN': 'Loan',
    'CNS': 'Loan',
    'MTG': 'Loan',
    'CK': 'Deposit',
    'SAV': 'Deposit',
    'TD': 'Deposit'
}

acct_orgs['Macro Account Type'] = acct_orgs['mjaccttypcd'].map(MACRO_TYPE_MAPPING)

In [None]:
summary_df = acct_orgs.pivot_table(
    index='taxrptfororgnbr',
    columns='Macro Account Type',
    aggfunc={
        'Net Balance':'sum',
        'acctnbr':'nunique'
    },
    fill_value=0
)

In [None]:
summary_df.columns = ['_'.join(col) for col in summary_df.columns]

In [None]:
summary_df = summary_df.reset_index()

In [None]:
summary_df['taxrptfororgnbr'] = summary_df['taxrptfororgnbr'].astype(int).astype(str)

In [None]:
summary_df

In [None]:
summary_df.info()

In [None]:
summary_df = summary_df.rename(columns={
    'taxrptfororgnbr':'taxrptfororgnbr',
    'Net Balance_Deposit':'Deposit Balance',
    'Net Balance_Loan':'Loan Balance',
    'acctnbr_Deposit':'Unique Deposit Accounts',
    'acctnbr_Loan':'Unique Loan Accounts'
}).copy()

In [None]:
summary_df

In [None]:
# Get other entity details
entity_details = acct_orgs.groupby('taxrptfororgnbr').agg(
    primaryownercity=('primaryownercity', 'first'),
    primaryownerstate=('primaryownerstate','first'),
    branchname=('branchname','first'),
    earliest_opendate=('contractdate','min')
).reset_index()

In [None]:
entity_details

In [None]:
entity_details['taxrptfororgnbr'] = entity_details['taxrptfororgnbr'].astype(int).astype(str)

In [None]:
# Merge two acct summary tables
summary_df = pd.merge(entity_details, summary_df, on='taxrptfororgnbr', how='inner')

In [None]:
non_profits['orgnbr'] = non_profits['orgnbr'].astype(str)

In [None]:
merged_df = pd.merge(non_profits, summary_df, left_on='orgnbr', right_on='taxrptfororgnbr', how='inner')

In [None]:
merged_df = merged_df.drop(columns=['taxrptfororgnbr']).copy()

In [None]:
merged_df['Total Accounts'] = merged_df['Unique Deposit Accounts'] + merged_df['Unique Loan Accounts']

In [None]:
merged_df = merged_df.sort_values(by='Total Accounts', ascending=False).copy()

In [None]:
merged_df

In [None]:
# Append taxid
vieworgtaxid = org_data['vieworgtaxid'].copy()

In [None]:
vieworgtaxid

In [None]:
assert vieworgtaxid['orgnbr'].is_unique, "Duplicates"

In [None]:
vieworgtaxid = vieworgtaxid[['orgnbr','taxid']].copy()

In [None]:
vieworgtaxid['orgnbr'] = vieworgtaxid['orgnbr'].astype(str)

In [None]:
merged_df = pd.merge(merged_df, vieworgtaxid, on='orgnbr', how='left')

In [None]:
merged_df.info()

In [None]:
merged_df

In [None]:
genesis_data.info()

In [None]:
genesis_data

In [None]:
genesis_data['tax_id_clean'] = genesis_data['Tax ID'].replace("-","")

In [None]:

dedupe_list = [{'df': genesis_data, 'field': 'tax_id_clean'}]
genesis_data = cdutils.deduplication.dedupe(dedupe_list)

In [None]:
check_df = pd.merge(merged_df, genesis_data, left_on='taxid',right_on='tax_id_clean',how='inner')

In [None]:
check_df

In [None]:
# Will turn into formal pipeline after getting feedback from business line
