In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
import src.config
from deltalake import DeltaTable
import pandas as pd
import cdutils.input_cleansing # type: ignore

In [None]:
"""
Fetching data module. Aim is import all necessary fields up front, but if needed, you can define another function to be called here.

Usage:
    import src.cdutils.database

You need to set your own date that you want to see in effective date embedded in the SQL Query
"""

import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
from datetime import datetime
from typing import Optional

# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating

def fetch_invr():
    """
    Main data query
    """
    
    wh_invr = text("""
    SELECT
        a.ACCTNBR,
        a.ACCTGRPNBR,
        a.INVRSTATCD,
        a.PCTOWNED,
        a.ORIGINVRRATE,
        a.CURRINVRRATE,
        a.DATELASTMAINT
    FROM
        OSIBANK.WH_INVR a
    """)
    
    acctgrpinvr = text("""
    SELECT
        a.ACCTGRPNBR,
        a.INVRORGNBR
    FROM
        OSIBANK.ACCTGRPINVR a
    """)

    queries = [
        {'key':'wh_invr', 'sql':wh_invr, 'engine':1},
        {'key':'acctgrpinvr', 'sql':acctgrpinvr, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data



# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating
def fetch_inactive_date_data():
    """
    Main data query
    """
    acctloanlimithist = text("""
    SELECT
        * 
    FROM
        OSIBANK.ACCTLOANLIMITHIST a
    """)
    # vieworgtaxid = text(f"""
    # SELECT
    #     *
    # FROM
    #     OSIBANK.VIEWORGTAXID a
    # """)

    queries = [
        {'key':'acctloanlimithist', 'sql':acctloanlimithist, 'engine':1},

        # {'key':'vieworgtaxid', 'sql':vieworgtaxid, 'engine':1},
    ]


    data = cdutils.database.connect.retrieve_data(queries)
    return data


def fetch_orgpersrole():
    """
    Fetch controlling person data from WH_ORGPERSROLE
    """
    query = text("""
    SELECT
        *
    FROM
        OSIBANK.WH_ORGPERSROLE a
    """)

    queries = [
        {'key':'wh_orgpersrole', 'sql':query, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data

def fetch_holdbacks():
    """
    Fetch latest balamt for holdback subaccounts (BALCATCD='HOLD') from ACCTSUBACCT and ACCTBALHIST.
    """
    query = text("""
    SELECT
        acctnbr,
        subacctnbr,
        balcatcd,
        balamt
    FROM (
        SELECT
            a.acctnbr,
            a.subacctnbr,
            a.balcatcd,
            h.balamt,
            h.effdate,
            ROW_NUMBER() OVER (PARTITION BY a.acctnbr, a.subacctnbr ORDER BY h.effdate DESC) AS rn
        FROM
            OSIBANK.ACCTSUBACCT a
        INNER JOIN
            OSIBANK.ACCTBALHIST h ON a.acctnbr = h.acctnbr AND a.subacctnbr = h.subacctnbr
        WHERE
            a.BALCATCD = 'HOLD'
    )
    WHERE rn = 1
    """)

    queries = [
        {'key':'holdbacks', 'sql':query, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data


In [None]:
data = fetch_inactive_date_data()


In [None]:
acctloanlimithist = data['acctloanlimithist'].copy()

In [None]:
acctloanlimithist

In [None]:
accts = DeltaTable(src.config.SILVER / "account").to_pandas()

In [None]:
accts

In [None]:
acctuserfields = DeltaTable(src.config.BRONZE / "wh_acctuserfields").to_pandas()

In [None]:
acctuserfields

In [None]:
fpts = acctuserfields[acctuserfields['acctuserfieldcd'] == 'FPTS'].copy()

In [None]:
fpts

In [None]:
assert fpts['acctnbr'].is_unique, "Dupes"

In [None]:
fpts = fpts[[
    'acctnbr',
    'acctuserfieldvalue'
]].copy()

import cdutils.input_cleansing
fpts_schema = {
    'acctnbr':'str'
}

fpts = cdutils.input_cleansing.cast_columns(fpts, fpts_schema)

accts = accts.merge(fpts, how='left', on='acctnbr')

In [None]:
accts

In [None]:
mismatch_fpts = (
    ((accts['totalpctsold'] > 0) & (accts['acctuserfieldvalue'] != 'Y')) |
    ((accts['acctuserfieldvalue'] == 'Y') & (accts['totalpctsold'] <= 0))
)

mismatched_records = accts[mismatch_fpts]

print(f"Total mismatch: {len(mismatched_records)}")

In [None]:
mismatched_records

In [None]:
import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
from datetime import datetime
from typing import Optional

# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating

def fetch_invr():
    """
    Main data query
    """
    
    wh_invr = text(f"""
    SELECT
        a.ACCTNBR,
        a.ACCTGRPNBR,
        a.INVRSTATCD,
        a.PCTOWNED,
        a.ORIGINVRRATE,
        a.CURRINVRRATE,
        a.DATELASTMAINT
    FROM
        OSIBANK.WH_INVR a
    """)
    
    acctgrpinvr = text(f"""
    SELECT
        a.ACCTGRPNBR,
        a.INVRORGNBR
    FROM
        OSIBANK.ACCTGRPINVR a
    """)

    queries = [
        {'key':'wh_invr', 'sql':wh_invr, 'engine':1},
        {'key':'acctgrpinvr', 'sql':acctgrpinvr, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data



In [None]:
# Get investor data
invr = fetch_invr()
wh_invr = invr['wh_invr'].copy()


acctgrpinvr = invr['acctgrpinvr'].copy()

In [None]:

base_customer_dim = DeltaTable(src.config.SILVER / "base_customer_dim").to_pandas()
base_customer_dim = base_customer_dim[[
    'customer_id',
    'customer_name'
]].copy()

In [None]:

wh_invr['acctgrpnbr'] = wh_invr['acctgrpnbr'].astype(str)


acctgrpinvr['acctgrpnbr'] = acctgrpinvr['acctgrpnbr'].astype(str)

import cdutils.customer_dim
acctgrpinvr = cdutils.customer_dim.orgify(acctgrpinvr, 'invrorgnbr')



In [None]:
acctgrpinvr

In [None]:
assert acctgrpinvr['acctgrpnbr'].is_unique, "Dupes"
# assert acctgrpinvr['customer_id'].is_unique, "Dupes"

In [None]:
merged_investor = wh_invr.merge(acctgrpinvr, on='acctgrpnbr', how='left').merge(base_customer_dim, on='customer_id', how='left')


In [None]:
merged_investor

In [None]:
merged_investor = merged_investor[merged_investor['invrstatcd'] == 'SOLD'].copy()

In [None]:
merged_investor = merged_investor.drop(columns=['datelastmaint']).copy()

In [None]:
merged_investor = merged_investor.rename(columns={
    'customer_name':'Participant Name'
}).copy()

In [None]:
merged_investor.info()

In [None]:
merged_investor_schema = {
    'acctnbr':'str'
}
merged_investor = cdutils.input_cleansing.cast_columns(merged_investor, merged_investor_schema)

In [None]:
merged_investor.describe()

In [None]:
merged_investor

In [None]:
# Filter down to minimal things that they would need to see
merged_investor = merged_investor[[
    'acctnbr',
    'pctowned',
    'Participant Name'
]].copy()

In [None]:
merged_investor

In [None]:
merged_investor['pctowned'] = pd.to_numeric(merged_investor['pctowned'])

In [None]:
merged_investor

In [None]:
# At this point, you could group by acctnbr and sum up pct owned. You could create a number of participants