In [4]:
import pandas as pd # type: ignore
from pathlib import Path
import src.transformations.joining
import src.cdutils.database
import src.cdutils.caching
import src.transformations.calculations

# Fetch data from database
data  = src.cdutils.database.fetch_data()

# Unpack data
wh_acctcommon = data['wh_acctcommon'].copy()
wh_acctloan = data['wh_acctloan'].copy()
wh_loans = data['wh_loans'].copy()
acctpropins = data['acctpropins'].copy()
wh_prop = data['wh_prop'].copy()
wh_prop2 = data['wh_prop2'].copy()
wh_inspolicy = data['wh_inspolicy'].copy()
persaddruse = data['persaddruse'].copy()
orgaddruse = data['orgaddruse'].copy()
wh_addr = data['wh_addr'].copy()

# Data Cleaning & Transformations
main_data = src.transformations.joining.join_loan_tables(wh_acctcommon, wh_acctloan, wh_loans)
prop_data = src.transformations.joining.join_prop_tables(wh_prop, wh_prop2)
main_data = src.transformations.joining.appending_owner_address(main_data, orgaddruse, persaddruse, wh_addr)
main_data = src.transformations.calculations.append_total_exposure_field(main_data)
main_data = src.transformations.calculations.cleaning_loan_data(main_data)
merged_data = src.transformations.joining.consolidation_with_multiple_props(main_data, prop_data)
insurance_merged = src.transformations.joining.merging_insurance_tables(acctpropins, wh_inspolicy)
merged_data = src.transformations.joining.append_insurance_data_to_main(merged_data, insurance_merged)

In [2]:
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9966 entries, 0 to 9965
Data columns (total 78 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   acctnbr                 9966 non-null   int64         
 1   effdate                 9966 non-null   datetime64[ns]
 2   ownersortname           9966 non-null   object        
 3   notebal                 9966 non-null   float64       
 4   bookbalance             9966 non-null   float64       
 5   mjaccttypcd             9966 non-null   object        
 6   currmiaccttypcd         9966 non-null   object        
 7   product                 9966 non-null   object        
 8   curracctstatcd          9966 non-null   object        
 9   taxrptforpersnbr        6193 non-null   float64       
 10  taxrptfororgnbr         3773 non-null   float64       
 11  loanofficer             9966 non-null   object        
 12  acctofficer             0 non-null      object  

In [7]:
def filtering_down_to_relevant_fields(df: pd.DataFrame) -> pd.DataFrame:
    """
    Reducing total columns down to ones requested

    Args:
        df (pd.DataFrame)

    Returns
        df (pd.DataFrame): Same dataset, just reduced total columns
    """
    df = df[[
        'acctnbr',
        'ownersortname',
        'text1',
        'text2',
        'cityname',
        'statecd',
        'zipcd',
        'notebal',
        'creditlimitamt',
        'origdate',
        'proptypdesc',
        'propdesc',
        'propaddr1',
        'propaddr2',
        'propcity',
        'propstate',
        'propzip',
        'instypcd',
        'instypdesc',
        'coverageamt',
        'premamt',
        'escrowyn',
        'mjaccttypcd',
        'currmiaccttypcd',
        'fdiccatcd',
        'fdiccatdesc'
        ]].copy()
    
    return df


In [8]:
merged_data = filtering_down_to_relevant_fields(merged_data)