In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
import pandas as pd
import numpy as np

In [None]:
import cdutils.acct_file_creation.core
from datetime import datetime

In [None]:
df = cdutils.acct_file_creation.core.query_df_on_date()

In [None]:
df

In [None]:
# Fetch wh_org data
import src.smb_campaign.fetch_data
raw_data = src.smb_campaign.fetch_data.fetch_data()

In [None]:
import cdutils.deduplication

In [None]:
# Dedupe org table
if 'wh_org' in raw_data:
    dedupe_list = [{'df': raw_data['wh_org'], 'field': 'orgnbr'}]
    raw_data['wh_org'] = cdutils.deduplication.dedupe(dedupe_list)

In [None]:
wh_org = raw_data['wh_org'].copy()

In [None]:
assert wh_org['orgnbr'].is_unique, "Not unique"

In [None]:
wh_org

In [None]:
wh_org['orgtypcd'].unique()

In [None]:
filtered_org = wh_org[~(wh_org['orgtypcd'].isin(['BRCH','BANK']))].copy()

In [None]:
filtered_org.info()

In [None]:
filtered_org = filtered_org[[
    'orgnbr',
    'orgname',
    'orgtypcd',
    'orgtypcddesc'
]].copy()

In [None]:
# Aggregate stats (total loans/deposits) per orgnbr
## orgs only
acct_orgs = df[df['taxrptforpersnbr'].isna()].copy()

In [None]:
import numpy as np

In [None]:
## loans/deposit categorization
# Account type mappings
ACCOUNT_TYPE_MAPPING = {
    'CML': 'Commercial Loan',
    'MLN': 'Commercial Loan',
    'CNS': 'Consumer Loan',
    'MTG': 'Residential Loan',
    'CK': 'Checking',
    'SAV': 'Savings',
    'TD': 'CD'
}

acct_orgs['Account Type'] = acct_orgs['mjaccttypcd'].map(ACCOUNT_TYPE_MAPPING)

In [None]:
acct_orgs = acct_orgs[~(acct_orgs['Account Type'].isna())].copy()

In [None]:
acct_orgs

In [None]:
MACRO_TYPE_MAPPING = {
    'CML': 'Loan',
    'MLN': 'Loan',
    'CNS': 'Loan',
    'MTG': 'Loan',
    'CK': 'Deposit',
    'SAV': 'Deposit',
    'TD': 'Deposit'
}

acct_orgs['Macro Account Type'] = acct_orgs['mjaccttypcd'].map(MACRO_TYPE_MAPPING)

In [None]:
# Get other entity details
entity_details = acct_orgs.groupby('taxrptfororgnbr').agg(
    primaryownercity=('primaryownercity', 'first'),
    primaryownerstate=('primaryownerstate','first'),
    earliest_opendate=('contractdate','min')
).reset_index()

In [None]:
entity_details

In [None]:
entity_details['taxrptfororgnbr'] = entity_details['taxrptfororgnbr'].astype(int).astype(str)

In [None]:
filtered_org['orgnbr'] = filtered_org['orgnbr'].astype(str)

In [None]:
merged_df = pd.merge(filtered_org, entity_details, left_on='orgnbr', right_on='taxrptfororgnbr', how='inner')

In [None]:
merged_df = merged_df.drop(columns=['taxrptfororgnbr']).copy()

In [None]:
merged_df

In [None]:
# Need to get address information
# ORGADDRUSE
# WH_ADDR

In [None]:
wh_addr = raw_data['wh_addr'].copy()
orgaddruse = raw_data['orgaddruse'].copy()

In [None]:
wh_addr['addrlinetypdesc1'].unique()

In [None]:
def create_full_street_address(df):
    """
    Processes a list of raw address records and returns a cleaned list.
    """

    STREET_TYPES = {
        'street', 'apartment number','building number', 'suite number', 'room number'
    }
    POBOX_TYPE = 'post office box number'

    for i in [1, 2, 3]:
        # Define the columns for this iteration
        text_col = f'text{i}'
        type_col = f'addrlinetypdesc{i}'

        # Create the condition: True if the type is a non-null street type
        # .str.lower() makes the check case-insensitive.
        # .isin() checks against our set of STREET_TYPES.
        # .fillna(False) handles cases where the type description is null.
        is_street_part = df[type_col].str.lower().isin(STREET_TYPES).fillna(False)

        # Use the .where() method to get the text value if the condition is True,
        # otherwise, it will be NaN (which is exactly what we want).
        df[f'part_col'] = df[text_col].where(is_street_part)

        # if PO box, store separately and use (mutually exclusive with street address)
        is_pobox_part = (df[type_col].str.lower() = POBOX_TYPE.fillna(False))



    # Now, combine the parts into the final address column
    street_part_columns = ['street_part1', 'street_part2', 'street_part3']

    # The .apply() method lets us run a function on each row.
    # We join the non-null values from our street parts.
    df['Full_Street_Address'] = df[street_part_columns].apply(
        lambda row: ' '.join(row.dropna().astype(str)),
        axis=1
    )


    # --- 3. Finalizing the Extract ---

    # Create the final, clean DataFrame with user-friendly column names
    df_clean = df[[
        'addrnbr',
        'Full_Street_Address',
        'cityname',
        'statecd',
        'zipcd'
    ]].copy()

    return df_clean 

In [None]:
cleaned_addr = create_full_street_address(wh_addr)

In [None]:
cleaned_addr

In [None]:
orgaddruse

In [None]:
orgaddruse = orgaddruse[orgaddruse['addrusecd'].isin(['PRI'])].copy()

In [None]:
orgaddruse['orgnbr'] = orgaddruse['orgnbr'].astype(str)
orgaddruse['addrnbr'] = orgaddruse['addrnbr'].astype(str)

cleaned_addr['addrnbr'] = cleaned_addr['addrnbr'].astype(str)

In [None]:
merged_address = pd.merge(orgaddruse, cleaned_addr, on='addrnbr', how='inner')

In [None]:
merged_address

In [None]:
wh_addr

In [None]:
# Will turn into formal pipeline after getting feedback from business line
