In [1]:
# %%
# %%
"""
Main Entry Point
"""
from pathlib import Path

import pandas as pd # type: ignore

import cdutils.pkey_sqlite # type: ignore
import cdutils.filtering # type: ignore
import cdutils.input_cleansing # type: ignore
import cdutils.cmo_append # type: ignore
import src.add_fields
import src.core_transform
import src.output_to_excel
from src._version import __version__
import src.output_to_excel_multiple_sheets

# def main(production_flag: bool=False):
#     if production_flag:
#         BASE_PATH = Path(r'\\00-DA1\Home\Share\Line of Business_Shared Services')
#         assert "prod" in __version__, (f"Cannot run in production mode without 'prod' in the __version__")
#     else:
#         BASE_PATH = Path('.')

   



# %%
# Get staging data from the daily deposit update. View dev section of documentation for more detail
INPUT_PATH = Path(r"\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Data_Analytics\Daily_Deposit_Update\Production\output\DailyDeposit_staging.xlsx")
data = pd.read_excel(INPUT_PATH)

# Add portfolio key
data = cdutils.pkey_sqlite.add_pkey(data)

# Add int rate
data = src.add_fields.add_noteintrate(data)


# Custom list of minors (Business Deposits)
minors = [
    'CK24', # 1st Business Checking
    'CK12', # Business Checking
    'CK25', # Simple Business Checking
    'CK30', # Business Elite Money Market
    'CK19', # Business Money Market
    'CK22', # Business Premium Plus MoneyMkt
    'CK23', # Premium Business Checking
    'CK40', # Community Assoc Reserve
    'CD67', # Commercial Negotiated Rate
    'CD01', # 1 Month Business CD
    'CD07', # 3 Month Business CD
    'CD17', # 6 Month Business CD
    'CD31', # 1 Year Business CD
    'CD35', # 1 Year Business CD
    'CD37', # 18 Month Business CD
    'CD38', # 2 Year Business CD
    'CD50', # 3 Year Business CD
    'CD53', # 4 Year Business CD
    'CD59', # 5 Year Business CD
    'CD76', # 9 Month Business CD
    'CD84', # 15 Month Business CD
    'CD95', # Business <12 Month Simple CD
    'CD96', # Business >12 Month Simple CD
    'CK28', # Investment Business Checking
    'CK33', # Specialty Business Checking
    'CK34', # ICS Shadow - Business - Demand
    'SV06' # Business Select High Yield
]

# Filter to only business deposit accounts
data = cdutils.filtering.filter_to_business_deposits(data, minors)


# Add CMO
data = cdutils.cmo_append.append_cmo(data)


data_schema = {
    'noteintrate': float
}

data = cdutils.input_cleansing.enforce_schema(data, data_schema).copy()




# %%
# Exclude BCSB internal accounts
data = data[~data['ownersortname'].str.contains('BRISTOL COUNTY SAVINGS', case=False, na=False)].copy()

# %%
data

# %%

   





Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,householdnbr,datelastmaint,portfolio_key,noteintrate
29,150735127,2025-07-08,CK,Simple Business Checking,1456.00,1456.00,CK25,JACQUELINE A. THEIS,"THE NEWPORT INVESTMENT HOLDINGS, LLC",ACT,...,13755.951667,154562.67,0,0,0,0,,NaT,66561,0.0000
60,151030469,2025-07-08,CK,Simple Business Checking,1509.54,644.11,CK25,MICHAEL A. HEY,"HAIR POWER, INC.",ACT,...,313.053333,1322.34,6,14,0,5,285587.0,2024-04-01 21:22:14,78401,0.0000
61,150896995,2025-07-08,CK,Business Elite Money Market,301.17,301.17,CK30,MARLENE C. LIRA,LEMIEUX PLUMBING LLC,ACT,...,325.275000,133.49,0,0,0,0,,NaT,73466,0.0095
68,150820952,2025-07-08,CK,Simple Business Checking,27532.96,27191.39,CK25,FRANK P. WILHELM,GORMAN'S WELDING INC,ACT,...,15412.901667,9667.07,4,2,0,0,127947.0,2022-11-21 22:38:07,33291,0.0000
76,27038561,2025-07-08,CK,Simple Business Checking,1437.97,2401.19,CK25,FRANK P. WILHELM,SHEAR ARTISTRY INC,ACT,...,2287.081667,4037.46,0,0,0,0,189113.0,2020-02-26 23:28:13,39341,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62661,27031282,2025-07-08,CK,Business Checking,2736.28,2933.21,CK12,JACQUELINE A. THEIS,ARTHURS AUTO JUNKYARD,ACT,...,3811.878333,3183.68,0,2,0,2,182588.0,2020-02-26 23:28:13,39299,0.0000
62662,27044459,2025-07-08,CK,Business Checking,1186.96,49309.53,CK12,DAMON T. ARPIN,VISUAL CREATIONS INC,ACT,...,57201.435833,54160.64,0,19,0,0,184654.0,2020-02-26 23:28:03,3735,0.0000
62678,150322776,2025-07-08,CK,Simple Business Checking,1182.75,1182.75,CK25,THOMAS D. KELLY,STARLIGHT DEVELOPMENT LLC,ACT,...,41767.530000,232504.98,0,0,0,0,185687.0,2021-09-14 08:28:08,3926,0.0000
62697,151060896,2025-07-08,CK,Community Assoc Reserve,209564.39,209564.39,CK40,HOWARD HIMMEL,HOPEWELL FARMS CONDOMINIUM I ASSOCIATION,ACT,...,181029.126667,0.00,0,0,0,0,289361.0,2024-07-15 21:22:14,79448,0.0200


In [2]:
# %%
ASSETS_PATH = Path('./assets')

files = [f for f in ASSETS_PATH.iterdir() if f.is_file()]

assert len(files) == 1, f"Expected exactly 1 file in {ASSETS_PATH}, found {len(files)}."

file = files[0]
assert file.suffix == '.csv', f"Expected an excel file"

xaa_data = pd.read_csv(file, header=3)

# %%
xaa_data.info()

#

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112188 entries, 0 to 112187
Data columns (total 10 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   Cycle End Date                         112188 non-null  object 
 1   Debit Account Number                   112188 non-null  int64  
 2   Analysis Account Type                  112188 non-null  int64  
 3   Account Name                           112188 non-null  object 
 4   Analyzed Charges                       112188 non-null  float64
 5   Combined Result for Settlement Period  112188 non-null  float64
 6   Earnings Credit Rate                   112188 non-null  float64
 7   Primary Officer Name                   91257 non-null   object 
 8   Secondary Officer Name                 0 non-null       float64
 9   Treasury Officer Name                  60596 non-null   object 
dtypes: float64(4), int64(2), object(4)
memory usage: 8.6+ MB

In [None]:




# # %%
# xaa_data['Analyzed Charges (Pre-ECR)'] = xaa_data['Analyzed Charges (Pre-ECR)'].str.replace('[\$,]','',regex=True)
# xaa_data['Combined Result for Settlement Period (Post-ECR + Fee-Based Total)'] = xaa_data['Combined Result for Settlement Period (Post-ECR + Fee-Based Total)'].str.replace('[\$,]','',regex=True)

# %%
xaa_schema = {
    'Analyzed Charges':'float',
    'Combined Result for Settlement Period':'float',
    'Earnings Credit Rate':'float',
    'Debit Account Number':'str'
}
xaa_data = cdutils.input_cleansing.enforce_schema(xaa_data, xaa_schema)



# %%

from datetime import datetime, timedelta

def create_account_summary_alternative(xaa_data, date_col='cycle_date'):
    # Ensure date column is datetime
    xaa_data = xaa_data.copy()
    xaa_data[date_col] = pd.to_datetime(xaa_data[date_col])
    # Calculate cutoff date for 12 months
    max_date = xaa_data[date_col].max()
    cutoff_date = max_date - timedelta(days=365)
    # Add ranking column to identify latest month per account
    xaa_data['date_rank'] = (xaa_data
                            .groupby('Debit Account Number')[date_col]
                            .rank(method='dense', ascending=False))
    # Create flags for latest month and trailing 12 months
    xaa_data['is_latest_month'] = xaa_data['date_rank'] == 1
    xaa_data['is_trailing_12m'] = xaa_data[date_col] >= cutoff_date
    # Aggregate using conditional sums
    summary = (xaa_data
            .groupby('Debit Account Number')
            .agg({
                # Latest month aggregations
                'Analyzed Charges': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].sum(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].sum(),
                ],
                'Combined Result for Settlement Period': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].sum(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].sum()
                ],
                'Earnings Credit Rate': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].mean(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].mean()
                ],
                'Primary Officer Name': 'first',
                'Secondary Officer Name': 'first',
                'Treasury Officer Name': 'first'
            })
            .reset_index())
    # Flatten column names
    summary.columns = [
        'Debit Account Number',
        'Latest_Month_Analyzed_Charges',
        'Trailing_12M_Analyzed_Charges',
        'Latest_Month_Combined_Result',
        'Trailing_12M_Combined_Result',
        'Latest_Month_ECR',
        'Trailing_12M_Avg_ECR',
        'Primary_Officer_Name_XAA',
        'Secondary_Officer_Name_XAA',
        'Treasury_Officer_Name_XAA'
    ]
    # Reorder columns
    column_order = [
        'Debit Account Number',
        'Latest_Month_Analyzed_Charges',
        'Latest_Month_Combined_Result',
        'Trailing_12M_Analyzed_Charges',
        'Trailing_12M_Combined_Result',
        'Latest_Month_ECR',
        'Trailing_12M_Avg_ECR',
        'Primary_Officer_Name_XAA',
        'Secondary_Officer_Name_XAA',        
        'Treasury_Officer_Name_XAA'
    ]
    return summary[column_order]


# %%
summarized_xaa = create_account_summary_alternative(xaa_data, date_col='Cycle End Date')

# %%
summarized_xaa_schema = {
    'Primary_Officer_Name_XAA':'str',
    'Secondary_Officer_Name_XAA':'str',        
    'Treasury_Officer_Name_XAA':'str'
}
summarized_xaa = cdutils.input_cleansing.enforce_schema(summarized_xaa, summarized_xaa_schema)

# %%

# %%
summarized_xaa = summarized_xaa.rename(columns={
    'Debit Account Number':'acctnbr',

}).copy()

assert summarized_xaa['acctnbr'].is_unique, "Duplicates"

# %%
merged_data = pd.merge(data, summarized_xaa, on='acctnbr', how='left')

# %%

fill_na_column_list = [
    'Latest_Month_Analyzed_Charges',
    'Latest_Month_Combined_Result',
    'Trailing_12M_Analyzed_Charges',
    'Trailing_12M_Combined_Result',
    'Latest_Month_ECR',
    'Trailing_12M_Avg_ECR',
]
for item in fill_na_column_list:
    merged_data[item] = merged_data[item].fillna(0)

# %%



# Sort descending order of notebal
merged_data = merged_data.sort_values(by='notebal', ascending=False)

# %%
formatted_data = src.core_transform.main_pipeline(merged_data)


# %%



# %%
# Output to excel (raw data)
# BASE_PATH = Path('.')
OUTPUT_PATH = BASE_PATH / Path('./output/business_deposits_concentration_with_xaa.xlsx')
with pd.ExcelWriter(OUTPUT_PATH, engine="openpyxl") as writer:
    merged_data.to_excel(writer, sheet_name='Unformatted', index=False)
    formatted_data.to_excel(writer, sheet_name='Sheet1', index=False)

# Format excel
src.output_to_excel_multiple_sheets.format_excel_file(OUTPUT_PATH)

if __name__ == '__main__':
    print(f"Starting [{__version__}]")
    # main(production_flag=True)
    main()
    print("Complete!")




In [4]:
formatted_data

Unnamed: 0,portfolio_key,acctnbr,ownersortname,product,mjaccttypcd,currmiaccttypcd,acctofficer,notebal,noteintrate,curracctstatcd,...,Cash Management Officer,Latest_Month_Analyzed_Charges,Latest_Month_Combined_Result,Trailing_12M_Analyzed_Charges,Trailing_12M_Combined_Result,Latest_Month_ECR,Trailing_12M_Avg_ECR,Primary_Officer_Name_XAA,Secondary_Officer_Name_XAA,Treasury_Officer_Name_XAA
0,3843,151199025,CHAVES HOLDINGS INC,Business Select High Yield,SAV,SV06,RICHARD J. CLARK,32166763.22,0.033,ACT,...,,0.0,0.0,0.0,0.0,0.0,0.0,,,
1,3843,151146406,RFC MFC 115 KINGMAN STREET LLC,Simple Business Checking,CK,CK25,JEFFREY M. VIALL,1689056.03,0.0,ACT,...,,0.0,0.0,50.53,-41.47,0.0,0.3,JEFFREY M. VIALL,,
2,3843,27052040,BAY STATE SEWAGE DISPOSAL INC,Business Checking,CK,CK12,JEFFREY M. VIALL,416467.91,0.0,ACT,...,,426.18,-110.18,851.57,-183.79,0.85,0.85,JEFFREY M. VIALL,,STEPHEN SHERMAN
3,3843,27019306,BAY STATE SEWAGE DISPOSAL INC,Business Checking,CK,CK12,JEFFREY M. VIALL,174343.9,0.0,ACT,...,,0.0,0.0,0.0,0.0,0.0,0.0,,,
4,3843,151146373,RFC MFC 105 KINGMAN STREET LLC,Simple Business Checking,CK,CK25,JEFFREY M. VIALL,50488.43,0.0,ACT,...,,0.0,0.0,38.33,-25.02,0.0,0.3,JEFFREY M. VIALL,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13125,,,,,,,,,,,...,,,,,,,,,,
13126,39664,29233828,ANAWAN PHARMACY LLC,Simple Business Checking,CK,CK25,FRANK P. WILHELM,27.15,0.0,ACT,...,,0.0,-8.0,0.0,-48.0,0.0,0.0,FRANK P. WILHELM,,STEPHEN SHERMAN
13127,39664,27102483,ANAWAN PHARMACY LLC,Simple Business Checking,CK,CK25,FRANK P. WILHELM,-729.61,0.0,ACT,...,,0.0,-8.0,500.0,-516.0,0.0,0.0,FRANK P. WILHELM,,STEPHEN SHERMAN
13128,39664,,ANAWAN PHARMACY LLC,,,,FRANK P. WILHELM,-702.46,0.0,,...,,0.0,-16.0,500.0,-564.0,0.0,0.0,,,


In [7]:
formatted_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13130 entries, 0 to 13129
Data columns (total 28 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   portfolio_key                  13130 non-null  object
 1   acctnbr                        13130 non-null  object
 2   ownersortname                  13130 non-null  object
 3   product                        13130 non-null  object
 4   mjaccttypcd                    13130 non-null  object
 5   currmiaccttypcd                13130 non-null  object
 6   acctofficer                    13126 non-null  object
 7   notebal                        13130 non-null  object
 8   noteintrate                    13130 non-null  object
 9   curracctstatcd                 13130 non-null  object
 10  contractdate                   13129 non-null  object
 11  3Mo_AvgBal                     13130 non-null  object
 12  TTM_AvgBal                     13130 non-null  object
 13  Y

In [17]:
summary_data = formatted_data[~(formatted_data['portfolio_key'] == "") & (formatted_data['acctnbr'] == "")].copy()

Unnamed: 0,portfolio_key,acctnbr,ownersortname,product,mjaccttypcd,currmiaccttypcd,acctofficer,notebal,noteintrate,curracctstatcd,...,Cash Management Officer,Latest_Month_Analyzed_Charges,Latest_Month_Combined_Result,Trailing_12M_Analyzed_Charges,Trailing_12M_Combined_Result,Latest_Month_ECR,Trailing_12M_Avg_ECR,Primary_Officer_Name_XAA,Secondary_Officer_Name_XAA,Treasury_Officer_Name_XAA
11,3843,,CHAVES HOLDINGS INC,,,,JEFFREY M. VIALL,34567359.94,0.052,,...,,426.18,-110.18,940.43,-258.28,0.85,1.45,,,
16,3544,,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",,,,KEVIN M. MCCARTHY,24869195.85,0.0465,,...,,426.26,-45.11,1608.98,-172.91,0.5,0.5,,,
30,45089,,NEW ENGLAND TREATMENT ACCESS LLC,,,,GEORGE J. MENDROS,16837422.95,0.022,,...,,4251.48,-1301.77,51199.01,-14966.99,0.5,0.5,,,
37,3960,,"CASE FMS, LLC",,,,AN T. LE,9337988.53,0.0245,,...,,10.0,-3.88,110.0,-63.29,0.5,0.5,,,
44,53183,,"BETA GROUP, INC.",,,,ANDREW J. OMER,9251338.97,0.0291,,...,,0.0,0.0,0.0,0.0,0.0,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13115,82905,,5 STAR GRANITE INC,,,,AMMAN A. HAIDRI,-108.33,0.0,,...,,0.0,0.0,20.0,-20.0,0.0,0.0,,,
13118,72847,,FAMILYSTORE.IO LLC,,,,DAVID FERREIRA,-251.4,0.0,,...,,5.0,-13.0,65.0,-73.0,0.0,0.0,,,
13121,64638,,BULLDOG WOODS LLC,,,,ALISSA E. HALL,-254.1,0.0,,...,,0.0,-8.0,0.0,-64.0,0.0,0.0,,,
13124,614,,PMD PROPERTIES LLC,,,,TIFFANY J. CAHILL,-288.27,0.0,,...,,0.0,0.0,0.0,-8.0,0.0,0.0,,,
