In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    def get_asset_class(group):
        grouped_sum = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        if grouped_sum.empty or grouped_sum.isna().all():
            return None

        asset_type = grouped_sum.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    df['asset_class'] = df['acctnbr'].map(raw_asset_classes).map(lambda x: mapping_dict.get(x, 'Other') if pd.notna(x) else 'No Data')
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
# """
# Resi piece of BUILT extract
# """
# # TODO: Implement Chris logic 
# pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
# """
# Full built extract
# """
# cml = fetch_cml()
# # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    def get_asset_class(group):
        summed = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        asset_type = summed.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class, include_groups=False).to_dict()
    df['asset_class'] = df['acctnbr'].map(raw_asset_classes).map(mapping_dict)
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
# """
# Resi piece of BUILT extract
# """
# # TODO: Implement Chris logic 
# pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
# """
# Full built extract
# """
# cml = fetch_cml()
# # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

def add_asset_class(df, mapping_dict):
    """
    Appends a new field 'asset_class' to df based on highest appraised values by property type
    """
    def get_asset_class(group):
        summed = group.groupby('proptypdesc')['aprsvalueamt'].sum()
        asset_type = summed.idxmax()
        return asset_type
    
    raw_asset_classes = df.groupby('acctnbr').apply(get_asset_class).to_dict()
    df['asset_class'] = df['acctnbr'].map(raw_asset_classes).map(mapping_dict)
    return df

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe

]].copy()

accts_schema = {
    'acctnbr':'str'
}
accts = cdutils.input_cleansing.cast_columns(accts, accts_schema)

acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

acct_prop_link_schema = {
    'acctnbr':'str',
    'propnbr':'str'
}

acct_prop_link = cdutils.input_cleansing.cast_columns(acct_prop_link, acct_prop_link_schema)
acct_prop_link = acct_prop_link[[
    'acctnbr',
    'propnbr'
]].copy()

# Property
property = DeltaTable(src.config.SILVER / "property").to_pandas()
prop_schema = {
    'propnbr':'str',
    'addrnbr':'str'
}

property = cdutils.input_cleansing.cast_columns(property, prop_schema)

# Filter down to applicable columns
property = property[[
    'propnbr',
    'aprsvalueamt',
    'aprsdate',
    'proptypdesc',
    'addrnbr',
    'owneroccupiedcd',
    'owneroccupieddesc',
    'nbrofunits',
]].copy()

# Merge
accts = accts.merge(acct_prop_link, on='acctnbr', how='left')
accts = accts.merge(property, on='propnbr', how='left')

address = DeltaTable(src.config.SILVER / "address").to_pandas()
address_schema = {
    'addrnbr':'str'
}
address = cdutils.input_cleansing.cast_columns(address, address_schema)

address = address.drop(columns='load_timestamp_utc').copy()
accts = accts.merge(address, on='addrnbr', how='left')

# Append asset class
# Property type grouping configuration
PROPERTY_TYPE_GROUPS = {
    'Autobody/Gas Station': ['Autobody/Gas Station','Gas Station and Convenience St','Auto-Truck Repair','Car Wash'],
    'Retail': ['Retail - Big Box Store','Shopping Plaza','Strip Plaza','General Retail','Dealership'],
    'Hospitality': ['Hotel/Motel','Hospitality/Event Space','Assisted Living'],
    'Recreation': ['Outdoor Recreation','Indoor Recreational','Golf Course','Marina'],
    'Industrial': ['Manufacturing','Warehouse','Industrial','Seafood Processing Plant','Solar Farm'],
    'Land': ['Land - Unimproved','Land - Improved','Parking Lot'],
    'Mixed Use': ['Mixed Use (Retail/Office)','Mixed Use (Retail/Residential)','Mixed Use (Office/Residential)'],
    'Multi Family': ['Apartment Building','Multi Family'],
    'General Office': ['Office - Professional','Office- General'],
    'Medical Office': ['Office - Medical'],
    'Restaurant': ['Restaurant'],
    'Residential': ['1-4 Fam Res - Non Own Occ','1 Family Residential - Own Occ','2 Family Residential - Own Occ','Condominium'],
    'Storage': ['Self Storage'],
    'Educational': ['Educational Facilities','Day Care'],
    'Religious': ['Church'],
    'Vehicles': ['Vehicle - Business','Boat'],
    'Other': ['Commercial - Other','Real Estate - Business','Real Estate - Bus&Bus Assets','Real Estate - Personal & Bus','Real Estate - Pers&Bus Assets','All Business Assets','Bus Assets w/Accts Receivable','UCC - ABA','UCC- Equipment','Assignment of Leases/Rents','General Contractor','Outdoor Dealers','Marketable Securities','SBA Loan','Funeral Home','Savings - Partially Secured','Passbook/Savings Secured']
}
accts = add_asset_class(accts, mapping_dict=PROPERTY_TYPE_GROUPS)

# Participation data can be separate or in there
# INVR fields maybe, could just leave off for this cycle

# return accts 

# def fetch_resi():
#     """
#     Resi piece of BUILT extract
#     """
#     # TODO: Implement Chris logic 
#     pass

# # def transform(df):
# #     """
# #     Core logic/transformations/filtering for BUILT extract

# #     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
# #     """

# #     df = df[[
# #         'effdate', # Effective date of data
# #         'acctnbr', # Loan Number
# #         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
# #         'loanlimityn', # LOC Type (Y/N)
# #         'notebal', # Draw Funded to Date
# #         'Net Balance', # BCSB Net Balance
# #         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
# #         'origdate', # Date loan hit core system (Close Date)
# #         'datemat', # Maturity Date (full loan)
# #         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
# #         # Create calculated field for term (Months) between inactivedate and origdate
# #         'noteintrate', # Interest Rate (Current)
# #         'mjaccttypcd', # Major code
# #         'currmiaccttypcd', # Minor code (1:1 match with product)
# #         'product', # Product Type
# #         # Asset class, calculated from proptypdesc mode with appraised values
# #         # All prop date requested
# #         # Appraisal info
# #         # Owner occ
# #         # Borrower info
# #         # They want controlling person for each org I believe
# #     ]].copy()



# #     # Participation data can be separate or in there
# #     # INVR fields maybe

# #     # Make sure acctnbr field is str datatype


# def generate_built_extract():
#     """
#     Full built extract
#     """
#     cml = fetch_cml()
#     # resi = fetch_resi()




In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd

# def fetch_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

# Filter to hasan defined acctnbrs for now
accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
accts = accts[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe
]].copy()



# Participation data can be separate or in there
# INVR fields maybe

# Make sure acctnbr field is str datatype


    # return accts 


In [None]:
accts

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd

def fetch_cml():
    """
    CML piece of BUILT extract
    """
    acctnbrs = [
        "151038843",
        "151193118",
        "151208305",
        "151167189",
        "151207620",
        "151095041",
        "151068098",
        "151068684",
        "151158766",
        "150443887",
        "150969031",
        "151173897",
    ].copy()

    accts = DeltaTable(src.config.SILVER / "account").to_pandas()

    # Filter to hasan defined acctnbrs for now
    accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()
    return accts 

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    # TODO: Implement Chris logic 
    pass

def transform(df):
    """
    Core logic/transformations/filtering for BUILT extract
    
    Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
    """

    df = df[[
        'effdate', # Effective date of data
        'acctnbr', # Loan Number
        'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
        'loanlimityn', # LOC Type (Y/N)
        'notebal', # Draw Funded to Date
        'Net Balance', # BCSB Net Balance
        # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
        'origdate', # Date loan hit core system (Close Date)
        'datemat', # Maturity Date (full loan)
        'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
        # Create calculated field for term (Months) between inactivedate and origdate
        'noteintrate', # Interest Rate (Current)
        'mjaccttypcd', # Major code
        'currmiaccttypcd', # Minor code (1:1 match with product)
        'product', # Product Type
        # Asset class, calculated from proptypdesc mode with appraised values
        # All prop date requested
        # Appraisal info
        # Owner occ
        # Borrower info
        # They want controlling person for each org I believe
    ]].copy()

    # Participation data can be separate or in there
    # INVR fields maybe

    # Make sure acctnbr field is str datatype
    pass

# def generate_built_extract():
"""
Full built extract
"""
cml = fetch_cml()
# resi = fetch_resi()

# cml = transform(cml)
df = cml.copy()

df = df[[
    'effdate', # Effective date of data
    'acctnbr', # Loan Number
    'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
    'loanlimityn', # LOC Type (Y/N)
    'notebal', # Draw Funded to Date
    'Net Balance', # BCSB Net Balance
    # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
    'origdate', # Date loan hit core system (Close Date)
    'datemat', # Maturity Date (full loan)
    'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
    # Create calculated field for term (Months) between inactivedate and origdate
    'noteintrate', # Interest Rate (Current)
    'mjaccttypcd', # Major code
    'currmiaccttypcd', # Minor code (1:1 match with product)
    'product', # Product Type
    # Asset class, calculated from proptypdesc mode with appraised values
    # All prop date requested
    # Appraisal info
    # Owner occ
    # Borrower info
    # They want controlling person for each org I believe
]].copy()

# Participation data can be separate or in there
# INVR fields maybe

# Make sure acctnbr field is str datatype


In [None]:
acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

In [None]:
acct_prop_link

In [None]:
property = DeltaTable(src.config.SILVER / "property").to_pandas()

In [None]:
property

In [None]:
import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
from datetime import datetime
from typing import Optional

# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating

def fetch_invr():
    """
    Main data query
    """
    
    wh_invr = text(f"""
    SELECT
        a.ACCTNBR,
        a.ACCTGRPNBR,
        a.INVRSTATCD,
        a.PCTOWNED,
        a.ORIGINVRRATE,
        a.CURRINVRRATE,
        a.DATELASTMAINT
    FROM
        OSIBANK.WH_INVR a
    """)
    
    acctgrpinvr = text(f"""
    SELECT
        a.ACCTGRPNBR,
        a.INVRORGNBR
    FROM
        OSIBANK.ACCTGRPINVR a
    """)

    queries = [
        {'key':'wh_invr', 'sql':wh_invr, 'engine':1},
        {'key':'acctgrpinvr', 'sql':acctgrpinvr, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data


In [None]:
import cdutils.deduplication

In [None]:
# Get investor data
invr = fetch_invr()
wh_invr = invr['wh_invr'].copy()


acctgrpinvr = invr['acctgrpinvr'].copy()

wh_org = DeltaTable(src.config.BRONZE / "wh_org").to_pandas()
wh_org = wh_org[[
    'orgnbr',
    'orgname'
]].copy()
dedupe_list = [
    {'df':wh_org, 'field':'orgnbr'}
]
wh_org = cdutils.deduplication.dedupe(dedupe_list).copy()
wh_org['orgnbr'] = wh_org['orgnbr'].astype(str)
wh_invr['acctgrpnbr'] = wh_invr['acctgrpnbr'].astype(str)
acctgrpinvr['acctgrpnbr'] = acctgrpinvr['acctgrpnbr'].astype(str)
acctgrpinvr['invrorgnbr'] = acctgrpinvr['invrorgnbr'].astype(str)

merged_investor = wh_invr.merge(acctgrpinvr, on='acctgrpnbr', how='left').merge(wh_org, left_on='invrorgnbr', right_on='orgnbr')
merged_investor = merged_investor.sort_values(by='pctowned', ascending=False).copy()
dedupe_list = [
    {'df':merged_investor, 'field':'acctnbr'}
]
merged_investor = cdutils.deduplication.dedupe(dedupe_list).copy()
merged_investor = merged_investor.drop(columns=['orgnbr','invrorgnbr','pctowned','acctgrpnbr']).copy()
merged_investor['acctnbr'] = merged_investor['acctnbr'].astype(str)
assert merged_investor['acctnbr'].is_unique, "Duplicates exist. Pre-merge of investor data to full df"


In [None]:
merged_investor

In [None]:
wh_invr

In [None]:
acctgrpinvr

In [None]:

def fetch_resi():
    """
    Resi piece of BUILT extract
    """
    # TODO: Implement Chris logic 
    pass

# def transform(df):
#     """
#     Core logic/transformations/filtering for BUILT extract
    
#     Takes in a df (cml/resi) and needs to produce a standardized schema for the output so we can union
#     """

#     df = df[[
#         'effdate', # Effective date of data
#         'acctnbr', # Loan Number
#         'creditlimitamt', # Loan Amount - this will go to 0 if it switches to Perm
#         'loanlimityn', # LOC Type (Y/N)
#         'notebal', # Draw Funded to Date
#         'Net Balance', # BCSB Net Balance
#         # 'contractdate', # Date loan closed. Opted to use orig date below, but check with Hasan/Dawn
#         'origdate', # Date loan hit core system (Close Date)
#         'datemat', # Maturity Date (full loan)
#         'inactivedate', # Inactive Date (LOC type product expires) - For BUILT purposes this would be Maturity Date I believe
#         # Create calculated field for term (Months) between inactivedate and origdate
#         'noteintrate', # Interest Rate (Current)
#         'mjaccttypcd', # Major code
#         'currmiaccttypcd', # Minor code (1:1 match with product)
#         'product', # Product Type
#         # Asset class, calculated from proptypdesc mode with appraised values
#         # All prop date requested
#         # Appraisal info
#         # Owner occ
#         # Borrower info
#         # They want controlling person for each org I believe
#     ]].copy()



#     # Participation data can be separate or in there
#     # INVR fields maybe

#     # Make sure acctnbr field is str datatype


def generate_built_extract():
    """
    Full built extract
    """
    cml = fetch_cml()
    # resi = fetch_resi()




In [None]:
df

In [None]:
prop = DeltaTable(src.config.SILVER / "property").to_pandas()

In [None]:
prop

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd

# def generate_cml():
"""
CML piece of BUILT extract
"""
acctnbrs = [
    "151038843",
    "151193118",
    "151208305",
    "151167189",
    "151207620",
    "151095041",
    "151068098",
    "151068684",
    "151158766",
    "150443887",
    "150969031",
    "151173897",
].copy()

accts = DeltaTable(src.config.SILVER / "account").to_pandas()

accts = accts[accts['acctnbr'].isin(acctnbrs)].copy()




In [None]:
accts

In [None]:
accts.info()

In [None]:
def generate_resi():
    """
    Resi piece of BUILT extract
    """
    pass

def generate_built_extract():
    """
    Full built extract
    """
    cml = generate_cml()
    resi = generate_resi()

