In [1]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [8]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore
import src.built.fetch_data

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    # Coerce aprsvalueamt to numeric for safety
    df['aprsvalueamt'] = pd.to_numeric(df['aprsvalueamt'], errors='coerce')
    
    def get_asset_class(group):
        # Strip whitespace from proptypdesc for matching
        group = group.copy()
        group['proptypdesc'] = group['proptypdesc'].str.strip()
        
        grouped_sum = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        if grouped_sum.empty or grouped_sum.isna().all():
            return None

        asset_type = grouped_sum.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    
    # Create reverse mapping: proptypdesc -> category
    reverse_mapping = {}
    for category, subtypes in mapping_dict.items():
        for subtype in subtypes:
            # Strip whitespace here too for consistency
            reverse_mapping[subtype.strip()] = category
    
    # Map acctnbr to proptypdesc, then to category (with fallback 'Other' for unmapped subtypes)
    df['asset_class'] = (
        df['acctnbr']
        .map(raw_asset_classes)
        .map(lambda x: reverse_mapping.get(x.strip() if pd.notna(x) else None, 'Other') if pd.notna(x) else 'No Data')
    )
    return df

def fetch_cml():
    """
    CML piece of BUILT extract
    """
    acctnbrs = [
        "151038843",
        "151193118",
        "151208305",
        "151167189",
        "151207620",
        "151095041",
        "151068098",
        "151068684",
        "151158766",
        "150443887",
        "150969031",
        "151173897",
    ].copy()

    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to hasan defined acctnbrs for now
    accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
    accts['MACRO TYPE'] = 'Commercial'
    return accts 

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to Resi Construction loans
    # TODO: Add in the holdback logic 
    resi_definite = ["MG01","MG64"]
    accts = accts[accts['currmiaccttypcd'].isin(resi_definite)]

    accts['MACRO TYPE'] = 'Residential'
    return accts

def generate_participation_sold_detail():
    """
    Generates the participation sold detail DataFrame.
    """
    # Get investor data
    invr = src.built.fetch_data.fetch_invr()
    wh_invr = invr['wh_invr'].copy()
    acctgrpinvr = invr['acctgrpinvr'].copy()

    # Load and process base_customer_dim
    base_customer_dim = DeltaTable(src.config.SILVER / "base_customer_dim").to_pandas()
    base_customer_dim = base_customer_dim[['customer_id', 'customer_name']].copy()

    # Type conversions
    wh_invr['acctgrpnbr'] = wh_invr['acctgrpnbr'].astype(str)
    acctgrpinvr['acctgrpnbr'] = acctgrpinvr['acctgrpnbr'].astype(str)

    # Apply orgify
    acctgrpinvr = cdutils.customer_dim.orgify(acctgrpinvr, 'invrorgnbr')

    # Assertions (removed in function for production, but can be added if needed)
    # assert acctgrpinvr['acctgrpnbr'].is_unique, "Dupes"

    # Merges
    merged_investor = wh_invr.merge(acctgrpinvr, on='acctgrpnbr', how='left').merge(
        base_customer_dim, on='customer_id', how='left'
    )

    # Filter for sold status
    merged_investor = merged_investor[merged_investor['invrstatcd'] == 'SOLD'].copy()

    # Drop column
    merged_investor = merged_investor.drop(columns=['datelastmaint']).copy()

    # Rename column
    merged_investor = merged_investor.rename(columns={
        'customer_name': 'Participant Name'
    }).copy()

    # Cast columns
    merged_investor_schema = {
        'acctnbr': 'str'
    }
    merged_investor = cdutils.input_cleansing.cast_columns(merged_investor, merged_investor_schema)

    # Filter to required columns
    merged_investor = merged_investor[[
        'acctnbr',
        'pctowned',
        'Participant Name'
    ]].copy()

    # Convert pctowned to numeric
    merged_investor['pctowned'] = pd.to_numeric(merged_investor['pctowned'])

    return merged_investor

    # return accts

    # Participation data can be separate or in there
    # INVR fields maybe, could just leave off for this cycle


# def generate_built_extract():
    """
    Full built extract
    """
cml = fetch_cml()
resi = fetch_resi()

    # cml = transform(cml)
    # resi = transform(resi)

    # concat_df = pd.concat([cml, resi], ignore_index=True)
    # return concat_df



# def transform(accts):
accts = cml.copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'MACRO TYPE', # CML/Resi
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    'customer_id',
    'ownersortname'
]].copy()

accts = accts.rename(columns={
    'ownersortname':'Primary Borrower Name'
}).copy()

# Append last advance date (lastdisbursdate from wh_loans)
wh_loans = DeltaTable(src.config.BRONZE / "wh_loans").to_pandas()
wh_loans = wh_loans[[
    'acctnbr',
    'lastdisbursdate'
]].copy()
wh_loans_schema = {
    'acctnbr':'str',
}

wh_loans = cdutils.input_cleansing.cast_columns(wh_loans, wh_loans_schema)

accts = accts.merge(wh_loans, on='acctnbr', how='left')

# Participation info
pct_sold_loans = generate_participation_sold_detail()

# Group by acctnbr
grouped_pct_sold_loans = (
    pct_sold_loans
    .groupby('acctnbr')
    .agg(
        Lead_Participant=('Participant Name', 'first'),  # First 'Participant Name' as Lead Participant
        Total_Participants=('Participant Name', 'nunique')  # Number of unique 'Participant Name'
    )
    .reset_index()  # Reset index to keep acctnbr as a column
)

# Merge with accts on acctnbr using left join
accts = accts.merge(grouped_pct_sold_loans, on='acctnbr', how='left')

# Assert that acctnbr is unique in accts
assert accts['acctnbr'].is_unique, "acctnbr is not unique in accts"    

wh_acctuserfields = DeltaTable(src.config.BRONZE / "wh_acctuserfields").to_pandas()
papu = wh_acctuserfields[wh_acctuserfields['acctuserfieldcd'] == 'PAPU'].copy()
parp = wh_acctuserfields[wh_acctuserfields['acctuserfieldcd'] == 'PARP'].copy()

# assert both papu & parp ['acctnbr'].is_unique, "Dupes"

papu_schema = {
    'acctnbr':'str'
}
papu = cdutils.input_cleansing.cast_columns(papu, papu_schema)

parp_schema = {
    'acctnbr':'str'
}
parp = cdutils.input_cleansing.cast_columns(parp, parp_schema)

# Filter down both to just df[['acctnbr','acctuserfieldvalue']]
papu = papu[['acctnbr', 'acctuserfieldvalue']].copy()
parp = parp[['acctnbr', 'acctuserfieldvalue']].copy()

# Name acctuserfieldvalue accordingly
papu = papu.rename(columns={'acctuserfieldvalue': 'totalpctbought'})
parp = parp.rename(columns={'acctuserfieldvalue': 'lead_bank'})

# Left join papu to accts on acctnbr, adding totalpctbought
accts = accts.merge(papu, on='acctnbr', how='left')

# Left join parp to accts on acctnbr, adding lead_bank
accts = accts.merge(parp, on='acctnbr', how='left')   

# Clean totalpctbought: remove '%' if present, convert to numeric, and divide by 100 if > 1 (assuming >1 means percentage like 44.76 for 44.76%, else leave as 0-1)
accts['totalpctbought'] = pd.to_numeric(accts['totalpctbought'].str.replace('%', ''), errors='coerce')
mask_pct = accts['totalpctbought'] > 1
accts.loc[mask_pct, 'totalpctbought'] = accts.loc[mask_pct, 'totalpctbought'] / 100

# Inactive date additional fields for # extensions and orig inactivedate
# TODO

# Controlling person section
# TODO


# Append primary address
customer_address_link = DeltaTable(src.config.SILVER / "customer_address_link").to_pandas()
customer_address_link = customer_address_link[customer_address_link['addrusecd'] == 'PRI'].copy()
customer_address_link = customer_address_link[[
    'customer_id',
    'addrnbr'
]].copy()
customer_address_link_schema = {
    'addrnbr':'str'
}
customer_address_link = cdutils.input_cleansing.cast_columns(customer_address_link, customer_address_link_schema)

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)
address = address.drop(columns=['load_timestamp_utc']).copy()
address = customer_address_link.merge(address, how='inner', on='addrnbr')
address = address.drop(columns=['addrnbr']).copy()

address = address.rename(columns={
    'Full_Street_Address':'Primary Borrower Address',
    'cityname':'Primary Borrower City',
    'statecd':'Primary Borrower State',
    'zipcd':'Primary Borrower Zip',
}).copy()
accts = accts.merge(address, how='left', on='customer_id')

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
address = address.rename(columns={
    'Full_Street_Address':'Property Address',
    'cityname':'Property City',
    'statecd':'Property State',
    'zipcd':'Primary Zip',
}).copy()

accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)
accts = accts[~(accts['addrnbr'].isnull())].copy()



In [9]:
accts

Unnamed: 0,effdate,acctnbr,MACRO TYPE,creditlimitamt,loanlimityn,notebal,Net Balance,origdate,datemat,inactivedate,...,proptypdesc,addrnbr,owneroccupiedcd,owneroccupieddesc,nbrofunits,Property Address,Property City,Property State,Primary Zip,asset_class
0,2025-10-09,151038843,Commercial,27500000.0,Y,9480000.0,6722181.79,2024-04-24 11:06:50,2044-04-22,2026-10-22,...,Apartment Building,1406663,5.0,Non-Owner Occupied,28.0,153 GANO ST,PROVIDENCE,RI,2906,Multi Family
1,2025-10-09,151038843,Commercial,27500000.0,Y,9480000.0,6722181.79,2024-04-24 11:06:50,2044-04-22,2026-10-22,...,Apartment Building,1406664,5.0,Non-Owner Occupied,35.0,157 GANO ST,PROVIDENCE,RI,2906,Multi Family
2,2025-10-09,151038843,Commercial,27500000.0,Y,9480000.0,6722181.79,2024-04-24 11:06:50,2044-04-22,2026-10-22,...,Apartment Building,1406665,5.0,Non-Owner Occupied,35.0,161 GANO ST,PROVIDENCE,RI,2906,Multi Family
3,2025-10-09,151038843,Commercial,27500000.0,Y,9480000.0,6722181.79,2024-04-24 11:06:50,2044-04-22,2026-10-22,...,Apartment Building,1406666,5.0,Non-Owner Occupied,35.0,165 GANO ST,PROVIDENCE,RI,2906,Multi Family
6,2025-10-09,151193118,Commercial,14700000.0,Y,0.0,0.0,2025-06-17 10:59:07,2035-06-16,2027-06-16,...,Apartment Building,1435211,,,70.0,300 BOURNE AVENUE,RUMFORD,RI,2916,Multi Family
9,2025-10-09,151208305,Commercial,13500000.0,Y,4306624.0,4306624.0,2025-07-31 10:46:11,2035-07-30,2027-07-30,...,Shopping Plaza,1438418,,,3.0,1500 DIAMOND HILL ROAD,WOONSOCKET,RI,2895,Retail
13,2025-10-09,151207620,Commercial,12000000.0,Y,1270662.19,1270662.19,2025-07-29 13:49:37,2032-03-09,2027-07-28,...,Apartment Building,1350681,5.0,Non-Owner Occupied,,30 MARTIN ST,CUMBERLAND,RI,2864,Multi Family
14,2025-10-09,151207620,Commercial,12000000.0,Y,1270662.19,1270662.19,2025-07-29 13:49:37,2032-03-09,2027-07-28,...,Apartment Building,1438282,,,76.0,30 MARTIN STREET,CUMBERLAND,RI,2864,Multi Family
18,2025-10-09,151095041,Commercial,10000000.0,Y,5852803.21,5852803.21,2024-08-27 15:27:55,2034-08-20,2026-02-20,...,Self Storage,1417043,,,2.0,10 DAVKIM LANE,NANTUCKET,MA,2554,Storage
22,2025-10-09,151068098,Commercial,6891985.0,Y,3412118.71,3412118.71,2024-06-26 14:06:34,2026-06-28,2026-06-28,...,Commercial - Other,1412462,,,1.0,680 PURCHASE STREET UNIT 1,NEW BEDFORD,MA,2740,Other


In [5]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore
import src.built.fetch_data
import cdutils.customer_dim

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    # Coerce aprsvalueamt to numeric for safety
    df['aprsvalueamt'] = pd.to_numeric(df['aprsvalueamt'], errors='coerce')
    
    def get_asset_class(group):
        # Strip whitespace from proptypdesc for matching
        group = group.copy()
        group['proptypdesc'] = group['proptypdesc'].str.strip()
        
        grouped_sum = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        if grouped_sum.empty or grouped_sum.isna().all():
            return None

        asset_type = grouped_sum.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    
    # Create reverse mapping: proptypdesc -> category
    reverse_mapping = {}
    for category, subtypes in mapping_dict.items():
        for subtype in subtypes:
            # Strip whitespace here too for consistency
            reverse_mapping[subtype.strip()] = category
    
    # Map acctnbr to proptypdesc, then to category (with fallback 'Other' for unmapped subtypes)
    df['asset_class'] = (
        df['acctnbr']
        .map(raw_asset_classes)
        .map(lambda x: reverse_mapping.get(x.strip() if pd.notna(x) else None, 'Other') if pd.notna(x) else 'No Data')
    )
    return df

def fetch_cml():
    """
    CML piece of BUILT extract
    """
    acctnbrs = [
        "151038843",
        "151193118",
        "151208305",
        "151167189",
        "151207620",
        "151095041",
        "151068098",
        "151068684",
        "151158766",
        "150443887",
        "150969031",
        "151173897",
    ].copy()

    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to hasan defined acctnbrs for now
    accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
    accts['MACRO TYPE'] = 'Commercial'
    return accts 

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to Resi Construction loans
    # TODO: Add in the holdback logic 
    resi_definite = ["MG01","MG64"]
    accts = accts[accts['currmiaccttypcd'].isin(resi_definite)]

    accts['MACRO TYPE'] = 'Residential'
    return accts

def generate_participation_sold_detail():
    """
    Generates the participation sold detail DataFrame.
    """
    # Get investor data
    invr = src.built.fetch_data.fetch_invr()
    wh_invr = invr['wh_invr'].copy()
    acctgrpinvr = invr['acctgrpinvr'].copy()

    # Load and process base_customer_dim
    base_customer_dim = DeltaTable(src.config.SILVER / "base_customer_dim").to_pandas()
    base_customer_dim = base_customer_dim[['customer_id', 'customer_name']].copy()

    # Type conversions
    wh_invr['acctgrpnbr'] = wh_invr['acctgrpnbr'].astype(str)
    acctgrpinvr['acctgrpnbr'] = acctgrpinvr['acctgrpnbr'].astype(str)

    # Apply orgify
    acctgrpinvr = cdutils.customer_dim.orgify(acctgrpinvr, 'invrorgnbr')

    # Assertions (removed in function for production, but can be added if needed)
    # assert acctgrpinvr['acctgrpnbr'].is_unique, "Dupes"

    # Merges
    merged_investor = wh_invr.merge(acctgrpinvr, on='acctgrpnbr', how='left').merge(
        base_customer_dim, on='customer_id', how='left'
    )

    # Filter for sold status
    merged_investor = merged_investor[merged_investor['invrstatcd'] == 'SOLD'].copy()

    # Drop column
    merged_investor = merged_investor.drop(columns=['datelastmaint']).copy()

    # Rename column
    merged_investor = merged_investor.rename(columns={
        'customer_name': 'Participant Name'
    }).copy()

    # Cast columns
    merged_investor_schema = {
        'acctnbr': 'str'
    }
    merged_investor = cdutils.input_cleansing.cast_columns(merged_investor, merged_investor_schema)

    # Filter to required columns
    merged_investor = merged_investor[[
        'acctnbr',
        'pctowned',
        'Participant Name'
    ]].copy()

    # Convert pctowned to numeric
    merged_investor['pctowned'] = pd.to_numeric(merged_investor['pctowned'])

    return merged_investor



    # Participation data can be separate or in there
    # INVR fields maybe, could just leave off for this cycle


# def generate_built_extract():
    """
    Full built extract
    """
cml = fetch_cml()
resi = fetch_resi()

    # cml = transform(cml)
    # resi = transform(resi)

    # concat_df = pd.concat([cml, resi], ignore_index=True)
    # return concat_df


accts = cml.copy()
# def transform(accts):
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'MACRO TYPE', # CML/Resi
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    'customer_id',
    'ownersortname'
]].copy()

accts = accts.rename(columns={
    'ownersortname':'Primary Borrower Name'
}).copy()

# Append last advance date (lastdisbursdate from wh_loans)
wh_loans = DeltaTable(src.config.BRONZE / "wh_loans").to_pandas()
wh_loans = wh_loans[[
    'acctnbr',
    'lastdisbursdate'
]].copy()
wh_loans_schema = {
    'acctnbr':'str',
}

wh_loans = cdutils.input_cleansing.cast_columns(wh_loans, wh_loans_schema)

accts = accts.merge(wh_loans, on='acctnbr', how='left')

# Participation info
pct_sold_loans = generate_participation_sold_detail()

# Group by acctnbr
grouped_pct_sold_loans = (
    pct_sold_loans
    .groupby('acctnbr')
    .agg(
        Lead_Participant=('Participant Name', 'first'),  # First 'Participant Name' as Lead Participant
        Total_Participants=('Participant Name', 'nunique')  # Number of unique 'Participant Name'
    )
    .reset_index()  # Reset index to keep acctnbr as a column
)

# Merge with accts on acctnbr using left join
accts = accts.merge(grouped_pct_sold_loans, on='acctnbr', how='left')

# Assert that acctnbr is unique in accts
assert accts['acctnbr'].is_unique, "acctnbr is not unique in accts"    

wh_acctuserfields = DeltaTable(src.config.BRONZE / "wh_acctuserfields").to_pandas()
papu = wh_acctuserfields[wh_acctuserfields['acctuserfieldcd'] == 'PAPU'].copy()
parp = wh_acctuserfields[wh_acctuserfields['acctuserfieldcd'] == 'PARP'].copy()

# assert both papu & parp ['acctnbr'].is_unique, "Dupes"

papu_schema = {
    'acctnbr':'str'
}
papu = cdutils.input_cleansing.cast_columns(papu, papu_schema)

parp_schema = {
    'acctnbr':'str'
}
parp = cdutils.input_cleansing.cast_columns(parp, parp_schema)

# Filter down both to just df[['acctnbr','acctuserfieldvalue']]
papu = papu[['acctnbr', 'acctuserfieldvalue']].copy()
parp = parp[['acctnbr', 'acctuserfieldvalue']].copy()

# Name acctuserfieldvalue accordingly
papu = papu.rename(columns={'acctuserfieldvalue': 'totalpctbought'})
parp = parp.rename(columns={'acctuserfieldvalue': 'lead_bank'})

# Left join papu to accts on acctnbr, adding totalpctbought
accts = accts.merge(papu, on='acctnbr', how='left')

# Left join parp to accts on acctnbr, adding lead_bank
accts = accts.merge(parp, on='acctnbr', how='left')   


In [6]:
accts

Unnamed: 0,effdate,acctnbr,MACRO TYPE,creditlimitamt,loanlimityn,notebal,Net Balance,origdate,datemat,inactivedate,...,mjaccttypcd,currmiaccttypcd,product,customer_id,Primary Borrower Name,lastdisbursdate,Lead_Participant,Total_Participants,totalpctbought,lead_bank
0,2025-10-09,151038843,Commercial,27500000.0,Y,9480000.0,6722181.79,2024-04-24 11:06:50,2044-04-22,2026-10-22,...,CML,CM08,CML ARM Construction,O1012385,"POWER 250, LLC",2025-09-10,BLUESTONE BANK,1.0,,
1,2025-10-09,151193118,Commercial,14700000.0,Y,0.0,0.0,2025-06-17 10:59:07,2035-06-16,2027-06-16,...,CML,CM08,CML ARM Construction,O1018391,NOBLE APARTMENTS LLC,NaT,,,,
2,2025-10-09,151208305,Commercial,13500000.0,Y,4306624.0,4306624.0,2025-07-31 10:46:11,2035-07-30,2027-07-30,...,CML,CM08,CML ARM Construction,O1018558,"WHP INVESTMENTS, LLC",2025-07-31,,,,
3,2025-10-09,151207620,Commercial,12000000.0,Y,1270662.19,1270662.19,2025-07-29 13:49:37,2032-03-09,2027-07-28,...,CML,CM08,CML ARM Construction,O1001419,BERKELEY BUSINESS CENTER LLC,2025-07-29,,,,
4,2025-10-09,151095041,Commercial,10000000.0,Y,5852803.21,5852803.21,2024-08-27 15:27:55,2034-08-20,2026-02-20,...,CML,CM08,CML ARM Construction,O1017220,"DXD SS FUND II NANTUCKET PROPCO, LLC",2025-10-07,,,44.763,Centreville Bank
5,2025-10-09,151068098,Commercial,6891985.0,Y,3412118.71,3412118.71,2024-06-26 14:06:34,2026-06-28,2026-06-28,...,CML,CM08,CML ARM Construction,O1016903,ZEITERION REALTY LLC,2025-10-08,,,,
6,2025-10-09,151167189,Commercial,12833000.0,Y,3852478.9,1926239.46,2025-04-11 13:18:09,2045-04-04,2028-04-04,...,CML,CM08,CML ARM Construction,O1002433,DARLING DEVELOPMENT CORPORATION,2025-09-24,THE COOPERATIVE BANK OF CAPE COD,1.0,,
7,2025-10-09,151068684,Commercial,5808015.0,Y,0.0,0.0,2024-06-28 08:44:43,2026-12-28,2026-12-28,...,CML,CM08,CML ARM Construction,O1004271,ZEITERION THEATRE INC,NaT,,,,
8,2025-10-09,150443887,Commercial,5102000.0,Y,2641214.19,2641214.19,2020-03-12 11:43:57,2026-08-11,2026-08-11,...,CML,CM08,CML ARM Construction,O1008851,"SHOPS AT EMERALD, LLC",2022-12-15,,,32.81%,Bluestone bank
9,2025-10-09,150969031,Commercial,3500000.0,Y,1348576.22,1348576.22,2023-11-09 15:54:59,2035-05-09,2026-05-09,...,CML,CM08,CML ARM Construction,O1016232,AMALGAMATED FINANCIAL GROUP IV,2025-09-24,,,32.71028%,Bluestone Bank


In [None]:


# Inactive date additional fields for # extensions and orig inactivedate
# TODO

# Controlling person section
# TODO


# Append primary address
customer_address_link = DeltaTable(src.config.SILVER / "customer_address_link").to_pandas()
customer_address_link = customer_address_link[customer_address_link['addrusecd'] == 'PRI'].copy()
customer_address_link = customer_address_link[[
    'customer_id',
    'addrnbr'
]].copy()
customer_address_link_schema = {
    'addrnbr':'str'
}
customer_address_link = cdutils.input_cleansing.cast_columns(customer_address_link, customer_address_link_schema)

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)
address = address.drop(columns=['load_timestamp_utc']).copy()
address = customer_address_link.merge(address, how='inner', on='addrnbr')
address = address.drop(columns=['addrnbr']).copy()

address = address.rename(columns={
    'Full_Street_Address':'Primary Borrower Address',
    'cityname':'Primary Borrower City',
    'statecd':'Primary Borrower State',
    'zipcd':'Primary Borrower Zip',
}).copy()
accts = accts.merge(address, how='left', on='customer_id')

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
address = address.rename(columns={
    'Full_Street_Address':'Property Address',
    'cityname':'Property City',
    'statecd':'Property State',
    'zipcd':'Primary Zip',
}).copy()

accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)
accts = accts[~(accts['addrnbr'].isnull())].copy()

#

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    # Coerce aprsvalueamt to numeric for safety
    df['aprsvalueamt'] = pd.to_numeric(df['aprsvalueamt'], errors='coerce')
    
    def get_asset_class(group):
        # Strip whitespace from proptypdesc for matching
        group = group.copy()
        group['proptypdesc'] = group['proptypdesc'].str.strip()
        
        grouped_sum = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        if grouped_sum.empty or grouped_sum.isna().all():
            return None

        asset_type = grouped_sum.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    
    # Create reverse mapping: proptypdesc -> category
    reverse_mapping = {}
    for category, subtypes in mapping_dict.items():
        for subtype in subtypes:
            # Strip whitespace here too for consistency
            reverse_mapping[subtype.strip()] = category
    
    # Map acctnbr to proptypdesc, then to category (with fallback 'Other' for unmapped subtypes)
    df['asset_class'] = (
        df['acctnbr']
        .map(raw_asset_classes)
        .map(lambda x: reverse_mapping.get(x.strip() if pd.notna(x) else None, 'Other') if pd.notna(x) else 'No Data')
    )
    return df

def fetch_cml():
    """
    CML piece of BUILT extract
    """
    acctnbrs = [
        "151038843",
        "151193118",
        "151208305",
        "151167189",
        "151207620",
        "151095041",
        "151068098",
        "151068684",
        "151158766",
        "150443887",
        "150969031",
        "151173897",
    ].copy()

    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to hasan defined acctnbrs for now
    accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
    accts['MACRO TYPE'] = 'Commercial'
    return accts 

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to Resi Construction loans
    # TODO: Add in the holdback logic 
    resi_definite = ["MG01","MG64"]
    accts = accts[accts['currmiaccttypcd'].isin(resi_definite)]

    accts['MACRO TYPE'] = 'Residential'
    return accts

    # Participation data can be separate or in there
    # INVR fields maybe, could just leave off for this cycle


# def generate_built_extract():
"""
Full built extract
"""
cml = fetch_cml()
resi = fetch_resi()

# # cml = transform(cml)
# # resi = transform(resi)

# concat_df = pd.concat([cml, resi], ignore_index=True)
# return concat_df





In [None]:
accts = cml.copy()

In [None]:
accts

In [None]:
# def transform(accts):
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'MACRO TYPE', # CML/Resi
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    'availbalamt', # Available amount total
    'Net Available', # BCSB Portion of available
    'credlimitclatresamt', # Collateral reserve amount
    'Net Collateral Reserve', # BCSB portion of collateral reserve
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    'customer_id',
    'ownersortname'
]].copy()

accts = accts.rename(columns={
    'ownersortname':'Primary Borrower Name'
}).copy()

In [None]:


# Add in last advance rate
wh_loans = DeltaTable(src.config.BRONZE / "wh_loans").to_pandas()
wh_loans

In [None]:
wh_loans = DeltaTable(src.config.BRONZE / "wh_loans").to_pandas()
wh_loans = wh_loans[[
    'acctnbr',
    'lastdisbursdate'
]].copy()
wh_loans_schema = {
    'acctnbr':'str',
}

wh_loans = cdutils.input_cleansing.cast_columns(wh_loans, wh_loans_schema)

accts = accts.merge(wh_loans, on='acctnbr', how='left')

In [None]:

# Append primary address
customer_address_link = DeltaTable(src.config.SILVER / "customer_address_link").to_pandas()
customer_address_link = customer_address_link[customer_address_link['addrusecd'] == 'PRI'].copy()
customer_address_link = customer_address_link[[
    'customer_id',
    'addrnbr'
]].copy()
customer_address_link_schema = {
    'addrnbr':'str'
}
customer_address_link = cdutils.input_cleansing.cast_columns(customer_address_link, customer_address_link_schema)

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)
address = address.drop(columns=['load_timestamp_utc']).copy()
address = customer_address_link.merge(address, how='inner', on='addrnbr')
address = address.drop(columns=['addrnbr']).copy()

address = address.rename(columns={
    'Full_Street_Address':'Primary Borrower Address',
    'cityname':'Primary Borrower City',
    'statecd':'Primary Borrower State',
    'zipcd':'Primary Borrower Zip',
}).copy()
accts = accts.merge(address, how='left', on='customer_id')

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
address = address.rename(columns={
    'Full_Street_Address':'Property Address',
    'cityname':'Property City',
    'statecd':'Property State',
    'zipcd':'Primary Zip',
}).copy()

accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)
accts = accts[~(accts['addrnbr'].isnull())].copy()

# return accts



In [None]:
accts

In [None]:
    # Filter to hasan defined acctnbrs for now
    accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
    accts = accts[[
        'effdate', # Effective date of data
        'acctnbr', # Loan Number
        'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
        'loanlimityn', # LOC Type (Y/N)
        'notebal', # Draw Funded to Date
        'Net Balance', # BCSB Net Balance
        # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
        'origdate', # Date loan hit core system (Close Date)
        'datemat', # Maturity Date (full loan)
        'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
        # Create calculated field for term (Months) between inactivedate and origdate
        'noteintrate', # Interest Rate (Current)
        'mjaccttypcd', # Major code
        'currmiaccttypcd', # Minor code (1:1 match with product)
        'product', # Product Type
        # Asset class, calculated from proptypdesc mode with appraised values
        # All prop date requested
        # Appraisal info
        # Owner occ
        # Borrower info
        # They want controlling person for each org I believe

    ]].copy()

    accts_schema = {
        'acctnbr':'str'
    }
    accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

    acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

    acct_prop_link_schema = {
        'acctnbr':'str',
        'propnbr':'str'
    }

    acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
    acct_prop_link = acct_prop_link[[
        'acctnbr',
        'propnbr'
    ]].copy()

    # Property
    property = DeltaTable(src.config.SILVER / "property").to_pandas()
    prop_schema = {
        'propnbr':'str',
        'addrnbr':'str'
    }

    property = cdutils.input_cleansing.cast_columns(property, prop_schema)

    # Filter down to applicable columns
    property = property[[
        'propnbr',
        'aprsvalueamt',
        'aprsdate',
        'proptypdesc',
        'addrnbr',
        'owneroccupiedcd',
        'owneroccupieddesc',
        'nbrofunits',
    ]].copy()

    # Merge
    accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
    accts = accts.merge(property, on='propnbr', how='left')

    address = DeltaTable(src.config.SILVER / "address").to_pandas()
    address_schema = {
        'addrnbr':'str'
    }
    address = cdutils.input_cleansing.cast_columns(address, address_schema)

    address = address.drop(columns='load_timestamp_utc').copy()
    accts = accts.merge(address, on='addrnbr', how='left')

    # Append asset class
    # Property type grouping configuration
    PROPERTY_TYPE_GROUPS = {
        'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
        'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
        'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
        'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
        'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
        'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
        'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
        'Multi Family': ['Apartment Building','Multi Family'],
        'General Office': ['Office - Professional','Office- General'],
        'Medical Office': ['Office - Medical'],
        'Restaurant': ['Restaurant'],
        'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
        'Storage': ['Self Storage'],
        'Educational': ['Educational Facilities','Day Care'],
        'Religious': ['Church'],
        'Vehicles': ['Vehicle - Business','Boat'],
        'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
    }
    accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)
    accts = accts[~(accts['addrnbr'].isnull())].copy()

    # Participation data can be separate or in there
    # INVR fields maybe, could just leave off for this cycle
    accts['MACRO TYPE'] = 'Commercial'
    return accts 

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to Resi Construction loans
    # TODO: Add in the holdback logic 
    resi_definite = ["MG01","MG64"]
    accts = accts[accts['currmiaccttypcd'].isin(resi_definite)]

    accts = accts[[
        'effdate', # Effective date of data
        'acctnbr', # Loan Number
        'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
        'loanlimityn', # LOC Type (Y/N)
        'notebal', # Draw Funded to Date
        'Net Balance', # BCSB Net Balance
        # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
        'origdate', # Date loan hit core system (Close Date)
        'datemat', # Maturity Date (full loan)
        'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
        # Create calculated field for term (Months) between inactivedate and origdate
        'noteintrate', # Interest Rate (Current)
        'mjaccttypcd', # Major code
        'currmiaccttypcd', # Minor code (1:1 match with product)
        'product', # Product Type
        # Asset class, calculated from proptypdesc mode with appraised values
        # All prop date requested
        # Appraisal info
        # Owner occ
        # Borrower info
        # They want controlling person for each org I believe

    ]].copy()

    accts_schema = {
        'acctnbr':'str'
    }
    accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

    acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

    acct_prop_link_schema = {
        'acctnbr':'str',
        'propnbr':'str'
    }

    acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
    acct_prop_link = acct_prop_link[[
        'acctnbr',
        'propnbr'
    ]].copy()

    # Property
    property = DeltaTable(src.config.SILVER / "property").to_pandas()
    prop_schema = {
        'propnbr':'str',
        'addrnbr':'str'
    }

    property = cdutils.input_cleansing.cast_columns(property, prop_schema)

    # Filter down to applicable columns
    property = property[[
        'propnbr',
        'aprsvalueamt',
        'aprsdate',
        'proptypdesc',
        'addrnbr',
        'owneroccupiedcd',
        'owneroccupieddesc',
        'nbrofunits',
    ]].copy()

    # Merge
    accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
    accts = accts.merge(property, on='propnbr', how='left')

    address = DeltaTable(src.config.SILVER / "address").to_pandas()
    address_schema = {
        'addrnbr':'str'
    }
    address = cdutils.input_cleansing.cast_columns(address, address_schema)

    address = address.drop(columns='load_timestamp_utc').copy()
    accts = accts.merge(address, on='addrnbr', how='left')

    # Append asset class
    # Property type grouping configuration
    PROPERTY_TYPE_GROUPS = {
        'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
        'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
        'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
        'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
        'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
        'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
        'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
        'Multi Family': ['Apartment Building','Multi Family'],
        'General Office': ['Office - Professional','Office- General'],
        'Medical Office': ['Office - Medical'],
        'Restaurant': ['Restaurant'],
        'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
        'Storage': ['Self Storage'],
        'Educational': ['Educational Facilities','Day Care'],
        'Religious': ['Church'],
        'Vehicles': ['Vehicle - Business','Boat'],
        'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
    }
    accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)
    accts = accts[~(accts['addrnbr'].isnull())].copy()
    accts['MACRO TYPE'] = 'Residential'
    return accts



# def generate_built_extract():
    """
    Full built extract
    """
cml = fetch_cml()
resi = fetch_resi()




In [None]:
 # Pull in Base Customer Layer
base_customer_dim = DeltaTable(src.config.SILVER / "base_customer_dim").to_pandas()

In [None]:
# Inner join with base customer dim
base_customer_dim = base_customer_dim[[
    'customer_id',
    'customer_type',
    'customer_name',
]].copy()
accts = accts.merge(base_customer_dim, on='customer_id', how='left')



In [None]:
accts

In [None]:

# Append primary address
customer_address_link = DeltaTable(src.config.SILVER / "customer_address_link").to_pandas()
customer_address_link = customer_address_link[customer_address_link['addrusecd'] == 'PRI'].copy()
customer_address_link = customer_address_link[[
    'customer_id',
    'addrnbr'
]].copy()
customer_address_link_schema = {
    'addrnbr':'str'
}
customer_address_link = cdutils.input_cleansing.cast_columns(customer_address_link, customer_address_link_schema)

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)
address = address.drop(columns=['load_timestamp_utc']).copy()
address = customer_address_link.merge(address, how='inner', on='addrnbr')
address = address.drop(columns=['addrnbr']).copy()

address = address.rename(columns={
    'Full_Street_Address':'Primary Borrower Address',
    'cityname':'Primary Borrower City',
    'statcd':'Primary Borrower State',
    'zipcd':'Primary Borrower Zip'
}).copy()



In [None]:
address

In [None]:
resi

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    # Coerce aprsvalueamt to numeric for safety
    df['aprsvalueamt'] = pd.to_numeric(df['aprsvalueamt'], errors='coerce')
    
    def get_asset_class(group):
        # Strip whitespace from proptypdesc for matching
        group = group.copy()
        group['proptypdesc'] = group['proptypdesc'].str.strip()
        
        grouped_sum = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        if grouped_sum.empty or grouped_sum.isna().all():
            return None

        asset_type = grouped_sum.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    
    # Create reverse mapping: proptypdesc -> category
    reverse_mapping = {}
    for category, subtypes in mapping_dict.items():
        for subtype in subtypes:
            # Strip whitespace here too for consistency
            reverse_mapping[subtype.strip()] = category
    
    # Map acctnbr to proptypdesc, then to category (with fallback 'Other' for unmapped subtypes)
    df['asset_class'] = (
        df['acctnbr']
        .map(raw_asset_classes)
        .map(lambda x: reverse_mapping.get(x.strip() if pd.notna(x) else None, 'Other') if pd.notna(x) else 'No Data')
    )
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()



In [None]:
accts

In [None]:
# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Filter out where no address (collateral that is not real estate)
accts = accts[~(accts['addrnbr'].isnull())].copy()

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
# """
# Resi piece of BUILT extract
# """
# # TODO: Implement Chris logic 
# pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
# """
# Full built extract
# """
# cml = fetch_cml()
# # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    def get_asset_class(group):
        grouped_sum = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        if grouped_sum.empty or grouped_sum.isna().all():
            return None

        asset_type = grouped_sum.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    df['asset_class'] = df['acctnbr'].map(raw_asset_classes).map(lambda x: mapping_dict.get(x, 'Other') if pd.notna(x) else 'No Data')
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
# """
# Resi piece of BUILT extract
# """
# # TODO: Implement Chris logic 
# pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
# """
# Full built extract
# """
# cml = fetch_cml()
# # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    def get_asset_class(group):
        summed = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        asset_type = summed.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    df['asset_class'] = df['acctnbr'].map(raw_asset_classes).map(mapping_dict)
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
# """
# Resi piece of BUILT extract
# """
# # TODO: Implement Chris logic 
# pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
# """
# Full built extract
# """
# cml = fetch_cml()
# # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    def get_asset_class(group):
        summed = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        asset_type = summed.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class).to_dict()
    df['asset_class'] = df['acctnbr'].map(raw_asset_classes).map(mapping_dict)
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
#     """
#     Resi piece of BUILT extract
#     """
#     # TODO: Implement Chris logic 
#     pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
#     """
#     Full built extract
#     """
#     cml = fetch_cml()
#     # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe
]].copy()



# Participation data can be separate or in there
# INVR fields maybe

# Make sure acctnbr field is str datatype


    # return accts 


In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd

def fetch_cml():
    """
    CML piece of BUILT extract
    """
    acctnbrs = [
        "151038843",
        "151193118",
        "151208305",
        "151167189",
        "151207620",
        "151095041",
        "151068098",
        "151068684",
        "151158766",
        "150443887",
        "150969031",
        "151173897",
    ].copy()

    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to hasan defined acctnbrs for now
    accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
    return accts 

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    # TODO: Implement Chris logic 
    pass

def transform(df):
    """
    Core logic/transformations/filtering for BUILT extract
    
    Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
    """

    df = df[[
        'effdate', # Effective date of data
        'acctnbr', # Loan Number
        'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
        'loanlimityn', # LOC Type (Y/N)
        'notebal', # Draw Funded to Date
        'Net Balance', # BCSB Net Balance
        # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
        'origdate', # Date loan hit core system (Close Date)
        'datemat', # Maturity Date (full loan)
        'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
        # Create calculated field for term (Months) between inactivedate and origdate
        'noteintrate', # Interest Rate (Current)
        'mjaccttypcd', # Major code
        'currmiaccttypcd', # Minor code (1:1 match with product)
        'product', # Product Type
        # Asset class, calculated from proptypdesc mode with appraised values
        # All prop date requested
        # Appraisal info
        # Owner occ
        # Borrower info
        # They want controlling person for each org I believe
    ]].copy()

    # Participation data can be separate or in there
    # INVR fields maybe

    # Make sure acctnbr field is str datatype
    pass

# def generate_built_extract():
"""
Full built extract
"""
cml = fetch_cml()
# resi = fetch_resi()

# cml = transform(cml)
df = cml.copy()

df = df[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe
]].copy()

# Participation data can be separate or in there
# INVR fields maybe

# Make sure acctnbr field is str datatype


In [None]:
acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

In [None]:
acct_prop_link

In [None]:
property = DeltaTable(src.config.SILVER / "property").to_pandas()

In [None]:
property

In [None]:
import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
from datetime import datetime
from typing import Optional

# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating

def fetch_invr():
    """
    Main data query
    """
    
    wh_invr = text(f"""
    SELECT
        a.ACCTNBR,
        a.ACCTGRPNBR,
        a.INVRSTATCD,
        a.PCTOWNED,
        a.ORIGINVRRATE,
        a.CURRINVRRATE,
        a.DATELASTMAINT
    FROM
        OSIBANK.WH_INVR a
    """)
    
    acctgrpinvr = text(f"""
    SELECT
        a.ACCTGRPNBR,
        a.INVRORGNBR
    FROM
        OSIBANK.ACCTGRPINVR a
    """)

    queries = [
        {'key':'wh_invr', 'sql':wh_invr, 'engine':1},
        {'key':'acctgrpinvr', 'sql':acctgrpinvr, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data


In [None]:
import cdutils.deduplication

In [None]:
# Get investor data
invr = fetch_invr()
wh_invr = invr['wh_invr'].copy()


acctgrpinvr = invr['acctgrpinvr'].copy()

wh_org = DeltaTable(src.config.BRONZE / "wh_org").to_pandas()
wh_org = wh_org[[
    'orgnbr',
    'orgname'
]].copy()
dedupe_list = [
    {'df':wh_org, 'field':'orgnbr'}
]
wh_org = cdutils.deduplication.dedupe(dedupe_list).copy()
wh_org['orgnbr'] = wh_org['orgnbr'].astype(str)
wh_invr['acctgrpnbr'] = wh_invr['acctgrpnbr'].astype(str)
acctgrpinvr['acctgrpnbr'] = acctgrpinvr['acctgrpnbr'].astype(str)
acctgrpinvr['invrorgnbr'] = acctgrpinvr['invrorgnbr'].astype(str)

merged_investor = wh_invr.merge(acctgrpinvr, on='acctgrpnbr', how='left').merge(wh_org, left_on='invrorgnbr', right_on='orgnbr')
merged_investor = merged_investor.sort_values(by='pctowned', ascending=False).copy()
dedupe_list = [
    {'df':merged_investor, 'field':'acctnbr'}
]
merged_investor = cdutils.deduplication.dedupe(dedupe_list).copy()
merged_investor = merged_investor.drop(columns=['orgnbr','invrorgnbr','pctowned','acctgrpnbr']).copy()
merged_investor['acctnbr'] = merged_investor['acctnbr'].astype(str)
assert merged_investor['acctnbr'].is_unique, "Duplicates exist. Pre-merge of investor data to full df"


In [None]:
merged_investor

In [None]:
wh_invr

In [None]:
acctgrpinvr

In [None]:

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    # TODO: Implement Chris logic 
    pass

# def transform(df):
#     """
#     Core logic/transformations/filtering for BUILT extract
    
#     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
#     """

#     df = df[[
#         'effdate', # Effective date of data
#         'acctnbr', # Loan Number
#         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
#         'loanlimityn', # LOC Type (Y/N)
#         'notebal', # Draw Funded to Date
#         'Net Balance', # BCSB Net Balance
#         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
#         'origdate', # Date loan hit core system (Close Date)
#         'datemat', # Maturity Date (full loan)
#         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
#         # Create calculated field for term (Months) between inactivedate and origdate
#         'noteintrate', # Interest Rate (Current)
#         'mjaccttypcd', # Major code
#         'currmiaccttypcd', # Minor code (1:1 match with product)
#         'product', # Product Type
#         # Asset class, calculated from proptypdesc mode with appraised values
#         # All prop date requested
#         # Appraisal info
#         # Owner occ
#         # Borrower info
#         # They want controlling person for each org I believe
#     ]].copy()



#     # Participation data can be separate or in there
#     # INVR fields maybe

#     # Make sure acctnbr field is str datatype


def generate_built_extract():
    """
    Full built extract
    """
    cml = fetch_cml()
    # resi = fetch_resi()




In [None]:
df

In [None]:
prop = DeltaTable(src.config.SILVER / "property").to_pandas()

In [None]:
prop

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd

# def generate_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()




In [None]:
accts

In [None]:
accts.info()

In [None]:
def generate_resi():
    """
    Resi piece of BUILT extract
    """
    pass

def generate_built_extract():
    """
    Full built extract
    """
    cml = generate_cml()
    resi = generate_resi()

