In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'prod'

In [None]:
import src.config
from deltalake import DeltaTable
from pathlib import Path
import pandas as pd


In [None]:
# TABLE_PATH = src.config.BRONZE / "metadata_lookup_engine1"
TABLE_PATH = src.config.SILVER / "account"


In [None]:
account = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
account

In [None]:
TABLE_PATH = src.config.BRONZE / "acctpropins"
acctpropins = DeltaTable(TABLE_PATH).to_pandas()

In [None]:
acctpropins

In [None]:
TABLE_PATH = src.config.BRONZE / "wh_inspolicy"
wh_inspolicy = DeltaTable(TABLE_PATH).to_pandas()


In [None]:
wh_inspolicy['instypdesc'].unique()

In [None]:
wh_inspolicy

In [None]:
TABLE_PATH = src.config.BRONZE / "wh_prop"
prop = DeltaTable(TABLE_PATH).to_pandas()
prop

In [None]:
prop['CompositeKey'] = prop['acctnbr'].astype(str) + prop['propnbr'].astype(str)
assert prop['CompositeKey'].is_unique, "Duplicate records"

In [None]:
TABLE_PATH = src.config.BRONZE / "wh_prop2"
prop2 = DeltaTable(TABLE_PATH).to_pandas()
prop2

In [None]:
prop2['CompositeKey'] = prop2['acctnbr'].astype(str) + prop2['propnbr'].astype(str)
assert prop2['CompositeKey'].is_unique, "Duplicate records"

In [None]:
merged_props = pd.merge(
    prop,
    prop2,
    how='outer',
    on=['acctnbr', 'propnbr'],
    suffixes=('_prop', '_prop2') # Use clear suffixes
)

In [None]:
merged_props

In [None]:
# This is a generic function to perform the coalesce operation.
def coalesce_columns(df, suffix1, suffix2):
    """
    Identifies columns with suffixes, creates a new coalesced column,
    and drops the old ones. It prioritizes the column with suffix2.
    """
    df_copy = df.copy()
    # Find all columns that have the first suffix
    cols1 = [c for c in df_copy.columns if c.endswith(suffix1)]
    
    for col1 in cols1:
        # Get the base column name and the corresponding column with the second suffix
        base_name = col1.removesuffix(suffix1)
        col2 = f"{base_name}{suffix2}"
        
        if col2 in df_copy.columns:
            # Create the new coalesced column.
            # It takes the value from col2 first, and if that is null, it takes the value from col1.
            df_copy[base_name] = df_copy[col2].fillna(df_copy[col1])
            
            # Drop the old suffixed columns
            df_copy = df_copy.drop(columns=[col1, col2])
            print(f"Coalesced '{base_name}' from '{col1}' and '{col2}'.")
            
    return df_copy

# Apply the function to our merged data
coalesced_data = coalesce_columns(merged_props, suffix1='_prop', suffix2='_prop2')
print("\nData after coalescing columns:")
coalesced_data.info()

In [None]:
# This table just preserves the many-to-many relationship keys.
account_property_link = coalesced_data[['acctnbr', 'propnbr']].copy()
account_property_link = account_property_link.drop_duplicates().reset_index(drop=True)

print(f"Created `account_property_link` table with {len(account_property_link)} unique links.")

In [None]:
account_property_link

In [None]:
merged_props

In [None]:
merged_props.info()

In [None]:
prop.info()

In [None]:
prop2.info()

In [None]:
master_property = coalesced_data.sort_values(by='acctnbr', ascending=False)
master_property = master_property.drop_duplicates(subset=['propnbr'], keep='first')

# The property table should not contain the account number, as that link is now separate.
master_property = master_property.drop(columns=['acctnbr'])
master_property = master_property.reset_index(drop=True)

print(f"Created master `property` table with {len(master_property)} unique properties.")
assert master_property['propnbr'].is_unique, "propnbr is not unique in the master property table!"
print("Assertion Passed: `propnbr` is a unique key for the property table.")


In [None]:
master_property = master_property.drop(columns=['proptypecd']).copy()