In [1]:
# %%
"""
Main Entry Point
"""
from pathlib import Path

import pandas as pd # type: ignore

import cdutils.pkey_sqlite # type: ignore
import cdutils.filtering # type: ignore
import cdutils.input_cleansing # type: ignore
import cdutils.cmo_append # type: ignore
import src.add_fields
import src.core_transform
import src.output_to_excel
from src._version import __version__
import src.output_to_excel_multiple_sheets

# def main(production_flag: bool=False):
#     if production_flag:
#         BASE_PATH = Path(r'\\00-DA1\Home\Share\Line of Business_Shared Services')
#         assert "prod" in __version__, (f"Cannot run in production mode without 'prod' in the __version__")
#     else:
#         BASE_PATH = Path('.')

   



# %%
# Get staging data from the daily deposit update. View dev section of documentation for more detail
INPUT_PATH = Path(r"\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Data_Analytics\Daily_Deposit_Update\Production\output\DailyDeposit_staging.xlsx")
data = pd.read_excel(INPUT_PATH)

# Add portfolio key
data = cdutils.pkey_sqlite.add_pkey(data)

# Add int rate
data = src.add_fields.add_noteintrate(data)


# Custom list of minors (Business Deposits)
minors = [
    'CK24', # 1st Business Checking
    'CK12', # Business Checking
    'CK25', # Simple Business Checking
    'CK30', # Business Elite Money Market
    'CK19', # Business Money Market
    'CK22', # Business Premium Plus MoneyMkt
    'CK23', # Premium Business Checking
    'CK40', # Community Assoc Reserve
    'CD67', # Commercial Negotiated Rate
    'CD01', # 1 Month Business CD
    'CD07', # 3 Month Business CD
    'CD17', # 6 Month Business CD
    'CD31', # 1 Year Business CD
    'CD35', # 1 Year Business CD
    'CD37', # 18 Month Business CD
    'CD38', # 2 Year Business CD
    'CD50', # 3 Year Business CD
    'CD53', # 4 Year Business CD
    'CD59', # 5 Year Business CD
    'CD76', # 9 Month Business CD
    'CD84', # 15 Month Business CD
    'CD95', # Business <12 Month Simple CD
    'CD96', # Business >12 Month Simple CD
    'CK28', # Investment Business Checking
    'CK33', # Specialty Business Checking
    'CK34', # ICS Shadow - Business - Demand
    'SV06' # Business Select High Yield
]

# Filter to only business deposit accounts
data = cdutils.filtering.filter_to_business_deposits(data, minors)


# Add CMO
data = cdutils.cmo_append.append_cmo(data)


data_schema = {
    'noteintrate': float
}

data = cdutils.input_cleansing.enforce_schema(data, data_schema).copy()




# %%
# Exclude BCSB internal accounts
data = data[~data['ownersortname'].str.contains('BRISTOL COUNTY SAVINGS', case=False, na=False)].copy()

In [2]:
data

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,householdnbr,datelastmaint,portfolio_key,noteintrate
9,63111713,2025-07-03,CK,Simple Business Checking,410.11,410.11,CK25,MICHAEL A. HEY,SCANLAN FAMILY INVESTMENT TRUST,ACT,...,12190.130000,25965.43,0,0,0,0,135148.0,2020-02-26 23:28:03,33261,0.0000
11,63131889,2025-07-03,CK,Business Elite Money Market,14.14,14.13,CK30,MICHAEL A. HEY,VINEYARD REALTY LLC,ACT,...,100.772500,537.30,0,0,0,0,254846.0,2022-09-12 22:20:36,39661,0.0095
12,63138190,2025-07-03,CK,Business Elite Money Market,568460.98,567368.81,CK30,KEVIN M. MCCARTHY,"COHEN CLEARY, P. C.",ACT,...,229698.512500,59115.77,0,0,0,0,289055.0,2024-10-29 22:23:33,16245,0.0245
35,61018627,2025-07-03,CK,Business Money Market,8752.17,8749.70,CK19,SANDRA J. VANDETTE,CHERYL L HOLDEN IRRV TR 2014,ACT,...,8653.365833,9570.14,0,0,0,0,183657.0,2020-02-26 23:28:13,41155,0.0036
52,61022314,2025-07-03,CK,Business Money Market,26434.11,26426.29,CK19,DAVID FERREIRA,BILMAR PROPERTIES LLC,ACT,...,19956.373333,16350.22,0,0,0,0,183085.0,2023-10-31 16:16:23,3878,0.0036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62683,150852046,2025-07-03,CK,Simple Business Checking,0.64,0.64,CK25,DAMON T. ARPIN,WES PROVIDENCE LLC,ACT,...,721.755000,2533.50,68,81,63,30,262840.0,2025-05-12 23:00:30,39681,0.0000
62701,150830761,2025-07-03,CK,Simple Business Checking,2259.31,1620.79,CK25,JACQUELINE A. THEIS,GFK ENTERPRISE LLC,ACT,...,1315.220000,7580.23,0,0,0,0,,NaT,64232,0.0000
62721,150954743,2025-07-03,TD,3 Month Business CD,54094.57,54094.57,CD07,MARK A. BORKMAN,"CRAMIK ENTERPRISES, INC DBA WARWICK HANGER",ACT,...,52528.402500,51128.42,0,0,0,0,230749.0,2023-11-01 14:38:34,62809,0.0395
62734,150703140,2025-07-03,CK,Business Elite Money Market,51728.41,51728.41,CK30,KAITLYN M. SILVA,TWIN BOYS LLC,ACT,...,51088.284167,53923.12,0,0,0,0,130907.0,2022-01-28 22:17:10,61437,0.0095


In [3]:

# %%
ASSETS_PATH = Path('./assets')

files = [f for f in ASSETS_PATH.iterdir() if f.is_file()]

assert len(files) == 1, f"Expected exactly 1 file in {ASSETS_PATH}, found {len(files)}."

file = files[0]
assert file.suffix == '.xlsx', f"Expected an excel file"

xaa_data = pd.read_excel(file)

In [4]:
xaa_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 110846 entries, 0 to 110845
Data columns (total 11 columns):
 #   Column                                                              Non-Null Count   Dtype         
---  ------                                                              --------------   -----         
 0   Cycle End Date                                                      110846 non-null  datetime64[ns]
 1   Debit Account Number                                                110846 non-null  int64         
 2   Analysis Account Type                                               110846 non-null  int64         
 3   Account Type Name                                                   110846 non-null  object        
 4   Account Name                                                        110846 non-null  object        
 5   Analyzed Charges (Pre-ECR)                                          110846 non-null  float64       
 6   Combined Result for Settlement Period (Post-

In [5]:




# # %%
# xaa_data['Analyzed Charges (Pre-ECR)'] = xaa_data['Analyzed Charges (Pre-ECR)'].str.replace('[\$,]','',regex=True)
# xaa_data['Combined Result for Settlement Period (Post-ECR + Fee-Based Total)'] = xaa_data['Combined Result for Settlement Period (Post-ECR + Fee-Based Total)'].str.replace('[\$,]','',regex=True)

# %%
xaa_schema = {
    'Analyzed Charges (Pre-ECR)':'float',
    'Combined Result for Settlement Period (Post-ECR + Fee-Based Total)':'float',
    'Earnings Credit Rate':'float',
    'Debit Account Number':'str'
}
xaa_data = cdutils.input_cleansing.enforce_schema(xaa_data, xaa_schema)



In [6]:

from datetime import datetime, timedelta

def create_account_summary_alternative(xaa_data, date_col='cycle_date'):
    # Ensure date column is datetime
    xaa_data = xaa_data.copy()
    xaa_data[date_col] = pd.to_datetime(xaa_data[date_col])
    # Calculate cutoff date for 12 months
    max_date = xaa_data[date_col].max()
    cutoff_date = max_date - timedelta(days=365)
    # Add ranking column to identify latest month per account
    xaa_data['date_rank'] = (xaa_data
                            .groupby('Debit Account Number')[date_col]
                            .rank(method='dense', ascending=False))
    # Create flags for latest month and trailing 12 months
    xaa_data['is_latest_month'] = xaa_data['date_rank'] == 1
    xaa_data['is_trailing_12m'] = xaa_data[date_col] >= cutoff_date
    # Aggregate using conditional sums
    summary = (xaa_data
            .groupby('Debit Account Number')
            .agg({
                # Latest month aggregations
                'Analyzed Charges (Pre-ECR)': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].sum(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].sum(),
                ],
                'Combined Result for Settlement Period (Post-ECR + Fee-Based Total)': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].sum(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].sum()
                ],
                'Earnings Credit Rate': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].mean(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].mean()
                ],
                'Primary Officer Name': 'first',
                'Secondary Officer Name': 'first',
                'Treasury Officer Name': 'first'
            })
            .reset_index())
    # Flatten column names
    summary.columns = [
        'Debit Account Number',
        'Latest_Month_Analyzed_Charges',
        'Trailing_12M_Analyzed_Charges',
        'Latest_Month_Combined_Result',
        'Trailing_12M_Combined_Result',
        'Latest_Month_ECR',
        'Trailing_12M_Avg_ECR',
        'Primary_Officer_Name_XAA',
        'Secondary_Officer_Name_XAA',
        'Treasury_Officer_Name_XAA'
    ]
    # Reorder columns
    column_order = [
        'Debit Account Number',
        'Latest_Month_Analyzed_Charges',
        'Latest_Month_Combined_Result',
        'Trailing_12M_Analyzed_Charges',
        'Trailing_12M_Combined_Result',
        'Latest_Month_ECR',
        'Trailing_12M_Avg_ECR',
        'Primary_Officer_Name_XAA',
        'Secondary_Officer_Name_XAA',        
        'Treasury_Officer_Name_XAA'
    ]
    return summary[column_order]


In [7]:
summarized_xaa = create_account_summary_alternative(xaa_data, date_col='Cycle End Date')

In [None]:
summarized_xaa_schema = {
    'Primary_Officer_Name_XAA':'str',
    'Secondary_Officer_Name_XAA':'str',        
    'Treasury_Officer_Name_XAA':'str'
}
summarized_xaa = cdutils.input_cleansing.enforce_schema(summarized_xaa, summarized_xaa_schema)

In [None]:

# %%
summarized_xaa = summarized_xaa.rename(columns={
    'Debit Account Number':'acctnbr',

}).copy()

assert summarized_xaa['acctnbr'].is_unique, "Duplicates"

# %%
merged_data = pd.merge(data, summarized_xaa, on='acctnbr', how='left')

In [10]:

fill_na_column_list = [
    'Latest_Month_Analyzed_Charges',
    'Latest_Month_Combined_Result',
    'Trailing_12M_Analyzed_Charges',
    'Trailing_12M_Combined_Result',
    'Latest_Month_ECR',
    'Trailing_12M_Avg_ECR',
]
for item in fill_na_column_list:
    merged_data[item] = merged_data[item].fillna(0)

In [11]:



# Sort descending order of notebal
merged_data = merged_data.sort_values(by='notebal', ascending=False)

In [12]:
formatted_data = src.core_transform.main_pipeline(merged_data)


In [None]:



# %%
# Output to excel (raw data)
# BASE_PATH = Path('.')
OUTPUT_PATH = BASE_PATH / Path('./output/business_deposits_concentration_with_xaa.xlsx')
with pd.ExcelWriter(OUTPUT_PATH, engine="openpyxl") as writer:
    merged_data.to_excel(writer, sheet_name='Unformatted', index=False)
    formatted_data.to_excel(writer, sheet_name='Sheet1', index=False)

# Format excel
src.output_to_excel_multiple_sheets.format_excel_file(OUTPUT_PATH)

# if __name__ == '__main__':
# print(f"Starting [{__version__}]")
# # main(production_flag=True)
# main()
# print("Complete!")




