In [None]:
## Memorializing this as the easy way to lookup accounts


In [None]:
"""
Main Entry Point
"""
from pathlib import Path
from typing import List
import argparse
from datetime import datetime

import pandas as pd # type: ignore

import src.fetch_data # type: ignore
import src.core_transform # type: ignore
import cdutils.pkey_sqlite # type: ignore
import cdutils.hhnbr # type: ignore


data = src.fetch_data.fetch_data()

# # # Core transformation pipeline
raw_data = src.core_transform.main_pipeline(data)

# Raw data with pkey appended
raw_data = cdutils.pkey_sqlite.add_pkey(raw_data)
raw_data = cdutils.pkey_sqlite.add_ownership_key(raw_data)
raw_data = cdutils.pkey_sqlite.add_address_key(raw_data)

In [None]:
househldacct = data['househldacct'].copy()
raw_data = cdutils.hhnbr.add_hh_nbr(raw_data, househldacct)

In [None]:
raw_data

In [None]:
import cdutils.loans.calculations

In [None]:
loan_category_df = cdutils.loans.calculations.categorize_loans(raw_data)

In [None]:
loan_category_df = loan_category_df[['acctnbr','Category']].copy()

In [None]:
df = pd.merge(raw_data, loan_category_df, on='acctnbr', how='left')

In [None]:
df

In [None]:
import cdutils.inactive_date

df = cdutils.inactive_date.append_inactive_date(df)

In [None]:
df

In [None]:
pers = data['wh_pers'].copy()

In [None]:
df.info()

In [None]:
import src.additional_fields

additional_fields = src.additional_fields.fetch_data()

In [None]:
additional_fields_to_append = additional_fields['wh_acctcommon'].copy()

In [None]:
import cdutils.input_cleansing

additional_fields_to_append_schema = {
    'acctnbr':'str'
}

additional_fields_to_append = cdutils.input_cleansing.enforce_schema(additional_fields_to_append, additional_fields_to_append_schema)

In [None]:
df = pd.merge(df, additional_fields_to_append, how='left', on='acctnbr')

In [None]:
df

In [None]:
# OUTPUT_PATH = Path('acct_table.csv')
# df.to_csv(OUTPUT_PATH, index=False)

In [None]:
import hashlib

In [None]:
def mask_pii(data, columns_to_mask, length=10):
    """
    Create a masking layer

    Pass in a dataframe to blackbox abstraction and get a dataframe returned with masked PII in specified fields

    Parameters:
    - data: raw data
    - columns_to_mask: list of columns
    - length: length of hash (10+ is recommended based on size of the data)
    """
    df_hashed = data.copy()

    for col in columns_to_mask:
        if col in df_hashed.columns:
            df_hashed[col] = df_hashed[col].astype(str).apply(
                lambda x: hashlib.sha256(x.encode('utf-8')).hexdigest()[:length]
            )
        else:
            raise ValueError(f"Column {col} not found in dataframe passed in")
        
    return df_hashed

In [None]:
columns_to_mask = [
    'acctnbr',
    'ownersortname',
    'loanofficer',
    'acctofficer',
    'taxrptfororgnbr',
    'taxrptforpersnbr'
    ]

masked_df = mask_pii(df, columns_to_mask)

In [None]:
masked_df

In [None]:
transposed_df = masked_df.head(3).T.reset_index()

In [None]:
transposed_df

In [None]:
OUTPUT_PATH = Path('masked_acct_table.csv')
masked_df.to_csv(OUTPUT_PATH, index=False)