In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'prod'

In [None]:
import src.config
from deltalake import DeltaTable
from pathlib import Path
import pandas as pd


In [None]:
TABLE_PATH = src.config.BRONZE / "metadata_lookup_engine1"
metadata = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
metadata

In [None]:
# TABLE_PATH = src.config.BRONZE / "metadata_lookup_engine1"
TABLE_PATH = src.config.SILVER / "account"


In [None]:
account = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
account

In [None]:
account_cleaned = account[
    (account['mjaccttypcd'].isin(['CML','MTG','MLN']))
].copy()

In [None]:
account_cleaned

In [None]:
# Get primary address for org/pers tax owner
TABLE_PATH = src.config.BRONZE / "persaddruse"
persaddruse = DeltaTable(TABLE_PATH).to_pandas()
TABLE_PATH = src.config.BRONZE / "orgaddruse"
orgaddruse = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
persaddruse = persaddruse[persaddruse['addrusecd'] == 'PRI'].copy()
orgaddruse = orgaddruse[orgaddruse['addrusecd'] == 'PRI'].copy()


In [None]:
persaddruse.info()

In [None]:
persaddruse = persaddruse[[
    'persnbr',
    'addrnbr'
]].copy()

orgaddruse = orgaddruse[[
    'orgnbr',
    'addrnbr'
]].copy()

In [None]:
persaddruse['persnbr'] = persaddruse['persnbr'].astype(str)
persaddruse['addrnbr'] = persaddruse['addrnbr'].astype(str)

orgaddruse['orgnbr'] = orgaddruse['orgnbr'].astype(str)
orgaddruse['addrnbr'] = orgaddruse['addrnbr'].astype(str)


In [None]:
persaddruse.info()

In [None]:
orgaddruse.info()

In [None]:
account_cleaned['taxrptforpersnbr'] = account['taxrptforpersnbr'].map('{:.0f}'.format, na_action='ignore')
account_cleaned['taxrptfororgnbr'] = account_cleaned['taxrptfororgnbr'].map('{:.0f}'.format, na_action='ignore')


In [None]:
account_cleaned = account_cleaned.merge(orgaddruse, how='left', left_on='taxrptfororgnbr', right_on='orgnbr').merge(persaddruse, how='left', left_on='taxrptforpersnbr',right_on='persnbr',suffixes=('_org','_pers'))

In [None]:
account_cleaned['addrnbr'] = account_cleaned['addrnbr_org'].fillna(account_cleaned['addrnbr_pers'])
account_cleaned = account_cleaned.drop(columns=['addrnbr_org','orgnbr','addrnbr_pers','persnbr']).copy()

In [None]:
account_cleaned

In [None]:
TABLE_PATH = src.config.SILVER / "address"
address = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
address = address.drop(columns='load_timestamp_utc').copy()

In [None]:
account_cleaned = account_cleaned.merge(address, on='addrnbr', how='left')

In [None]:
TABLE_PATH = src.config.SILVER / "property"
property = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
property

In [None]:
property = property.drop(columns='load_timestamp_utc').copy()

In [None]:
# Acct prop linking
TABLE_PATH = src.config.SILVER / "account_property_link"
acct_prop_link = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
acct_prop_link

In [None]:
acct_prop_link = acct_prop_link.drop(columns='load_timestamp_utc').copy()

In [None]:
account_cleaned.info()

In [None]:
acct_prop_link.info()

In [None]:
merged_df = account_cleaned.merge(acct_prop_link, how='inner', on='acctnbr').merge(property, how='left', on='propnbr')

In [None]:
merged_df

In [None]:
heloc_non_flood_to_exclude = ['MG52','MG55','MG48','MG71']
mask_to_exclude = (
    (merged_df['currmiaccttypcd'].isin(heloc_non_flood_to_exclude)) &
    (merged_df['floodzoneyn'] == 'N')
)

In [None]:
# Filter out HELOCs that are not in flood zones
merged_df = merged_df[~mask_to_exclude].copy()

In [None]:
merged_df

In [None]:
TABLE_PATH = src.config.SILVER / "insurance"
insurance = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
insurance

In [None]:
insurance['instypcd'].unique()

In [None]:
# Exclude PMI
insurance = insurance[insurance['instypcd'] != "PMI"].copy()

In [None]:
insurance.info(verbose=True)

In [None]:
insurance = insurance[[
    'intrpolicynbr',
    'instypcd',
    'instypdesc',
    'insorgnbr',
    'escrowyn_link',
    'premamt',
    'effdate_policy',
    'expiredate',
    'inactivedate_policy',
    'coverageamt'
]].copy()

In [None]:
insurance.info()

In [None]:
insurance['insorgnbr'] = insurance['insorgnbr'].astype(str)

In [None]:
TABLE_PATH = src.config.BRONZE / "wh_org"
wh_org = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
wh_org['orgnbr'] = wh_org['orgnbr'].astype(str)

In [None]:
wh_org = wh_org[['orgnbr','orgname']].copy()

In [None]:
wh_org = wh_org.rename(columns={'orgname':'Insurance Company Name'}).copy()

In [None]:
insurance = insurance.merge(wh_org, how='left', left_on='insorgnbr', right_on='orgnbr')

In [None]:
insurance

In [None]:
insurance = insurance.drop(columns=['insorgnbr','orgnbr']).copy()

In [None]:
TABLE_PATH = src.config.SILVER / "acct_prop_ins_link"
acct_prop_ins_link = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
acct_prop_ins_link

In [None]:
acct_prop_ins_link.info()

In [None]:
acct_prop_ins_link = acct_prop_ins_link[['propnbr','intrpolicynbr']].copy()
acct_prop_ins_link = acct_prop_ins_link.drop_duplicates().reset_index(drop=True)

In [None]:
acct_prop_ins_link

In [None]:
merged_df.info(verbose=True)

In [None]:
merged_df = merged_df[[
    'acctnbr',
    'ownersortname',
    'notebal',
    'Net Balance',
    'noteopenamt',
    'orig_ttl_loan_amt',
    'Full_Street_Address',
    'cityname',
    'statecd',
    'zipcd',
    'propnbr',
    'proptypcd',
    'proptypdesc',
    'propaddr1',
    'propaddr2',
    'propaddr3',
    'propcity',
    'propstate',
    'propzip',
    'floodzoneyn',
    'aprsvalueamt'
]].copy()

In [None]:
merged_df = merged_df.rename(columns={
    'Full_Street_Address':'Borrower Primary Address',
    'cityname':'Borrower City',
    'statecd':'Borrower State',
    'zipcd': 'Borrower Zip'
}
).copy()

In [None]:
new_merged_df = merged_df.merge(acct_prop_ins_link, how='left',on='propnbr').merge(insurance, how='left', on='intrpolicynbr')

In [None]:
new_merged_df

In [None]:
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}



In [None]:
# Create mapping from individual property types to groups
proptype_mapping = {code: group for group, codes in PROPERTY_TYPE_GROUPS.items() for code in codes}

# Add cleaned property type to property data
new_merged_df['Cleaned Prop Type'] = new_merged_df['proptypdesc'].map(proptype_mapping).fillna('Other')

In [None]:
new_merged_df.info(verbose=True)

In [None]:
new_merged_df