In [1]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [2]:

"""
Main Entry Point
"""
from pathlib import Path

import pandas as pd # type: ignore

import cdutils.pkey_sqlite # type: ignore
import cdutils.filtering # type: ignore
import cdutils.input_cleansing # type: ignore
import cdutils.cmo_append # type: ignore
import src.add_fields
import src.core_transform
import src.output_to_excel
from src._version import __version__
import src.output_to_excel_multiple_sheets
import cdutils.distribution # type: ignore
from datetime import datetime
from dateutil.relativedelta import relativedelta

# def main(production_flag: bool=False):
#     if production_flag:
#         BASE_PATH = Path(r'\\00-DA1\Home\Share\Line of Business_Shared Services')
#         assert "prod" in __version__, (f"Cannot run in production mode without 'prod' in the __version__")
#     else:
BASE_PATH = Path('.')



# %%
# Get staging data from the daily deposit update. View dev section of documentation for more detail
INPUT_PATH = Path(r"\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Data_Analytics\Daily_Deposit_Update\Production\output\DailyDeposit_staging.xlsx")
data = pd.read_excel(INPUT_PATH)

# Add portfolio key
data = cdutils.pkey_sqlite.add_pkey(data)

# Add int rate
data = src.add_fields.add_noteintrate(data)



In [3]:
full_data_copy = data.copy()

In [4]:

# Custom list of minors (Business Deposits)
minors = [
    'CK24', # 1st Business Checking
    'CK12', # Business Checking
    'CK25', # Simple Business Checking
    'CK30', # Business Elite Money Market
    'CK19', # Business Money Market
    'CK22', # Business Premium Plus MoneyMkt
    'CK23', # Premium Business Checking
    'CK40', # Community Assoc Reserve
    'CD67', # Commercial Negotiated Rate
    'CD01', # 1 Month Business CD
    'CD07', # 3 Month Business CD
    'CD17', # 6 Month Business CD
    'CD31', # 1 Year Business CD
    'CD35', # 1 Year Business CD
    'CD37', # 18 Month Business CD
    'CD38', # 2 Year Business CD
    'CD50', # 3 Year Business CD
    'CD53', # 4 Year Business CD
    'CD59', # 5 Year Business CD
    'CD76', # 9 Month Business CD
    'CD84', # 15 Month Business CD
    'CD95', # Business <12 Month Simple CD
    'CD96', # Business >12 Month Simple CD
    'CK28', # Investment Business Checking
    'CK33', # Specialty Business Checking
    'CK34', # ICS Shadow - Business - Demand
    'SV06' # Business Select High Yield
]

# Filter to only business deposit accounts
data = cdutils.filtering.filter_to_business_deposits(data, minors)


# Add CMO
data = cdutils.cmo_append.append_cmo(data)


data_schema = {
    'noteintrate': float
}

data = cdutils.input_cleansing.enforce_schema(data, data_schema).copy()




# %%
# Exclude BCSB internal accounts
data = data[~data['ownersortname'].str.contains('BRISTOL COUNTY SAVINGS', case=False, na=False)].copy()

# %%
data

# %%





Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,householdnbr,datelastmaint,portfolio_key,noteintrate
9,150820952,2025-08-20,CK,Free Business Checking,26113.47,17086.04,CK25,FRANK P. WILHELM,GORMAN'S WELDING INC,ACT,...,16793.979167,8565.16,4,2,0,0,127947.0,2022-11-21 22:38:07,33291,0.0000
15,151030469,2025-08-20,CK,Free Business Checking,214.49,233.26,CK25,MICHAEL A. HEY,"HAIR POWER, INC.",ACT,...,413.902500,615.78,7,16,1,7,285587.0,2024-04-01 21:22:14,78383,0.0000
19,27038561,2025-08-20,CK,Free Business Checking,2900.02,2156.61,CK25,FRANK P. WILHELM,SHEAR ARTISTRY INC,ACT,...,2170.294167,3764.99,0,0,0,0,189113.0,2020-02-26 23:28:13,39341,0.0000
20,27041146,2025-08-20,CK,1st Choice Business Checking,289438.57,299602.67,CK24,GEORGE J. MENDROS,COLLINS SMITH & O'CONNOR LLP,ACT,...,298992.355833,319929.00,0,0,0,0,182088.0,2020-02-26 23:28:03,3698,0.0000
21,27046087,2025-08-20,CK,Business Checking,2771.40,3837.92,CK12,ALISSA E. HALL,NICORB INC,ACT,...,16007.492500,45350.37,0,1,0,0,187368.0,2025-04-07 20:41:55,3620,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62631,151062561,2025-08-20,CK,Business Elite Money Market,1802.01,1172.01,CK30,LAURA A. STACK,"SUMNER AVE, LLC",ACT,...,2467.970000,4504.32,0,0,0,0,,NaT,78845,0.0095
62643,150684184,2025-08-20,CK,Business Money Market,37044.20,37044.20,CK19,JUSTIN A. JEFFREY,DAZ CLEANING CO,ACT,...,28980.158333,20559.02,0,0,0,0,235667.0,2021-12-08 22:12:36,64016,0.0036
62654,151104925,2025-08-20,CK,Free Business Checking,1754.00,1754.00,CK25,MARK A. BORKMAN,15 GARRITY RD LLC,ACT,...,3138.182727,0.00,0,0,0,0,,NaT,60428,0.0000
62694,150952490,2025-08-20,TD,6 Month Business CD,10000.00,10000.00,CD17,JACQUELINE A. THEIS,KIWANIS CLUB OF TAUNTON INC,ACT,...,13836.605833,18622.49,0,0,0,0,186458.0,2023-09-26 22:26:55,4324,0.0390


In [5]:
data

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,householdnbr,datelastmaint,portfolio_key,noteintrate
9,150820952,2025-08-20,CK,Free Business Checking,26113.47,17086.04,CK25,FRANK P. WILHELM,GORMAN'S WELDING INC,ACT,...,16793.979167,8565.16,4,2,0,0,127947.0,2022-11-21 22:38:07,33291,0.0000
15,151030469,2025-08-20,CK,Free Business Checking,214.49,233.26,CK25,MICHAEL A. HEY,"HAIR POWER, INC.",ACT,...,413.902500,615.78,7,16,1,7,285587.0,2024-04-01 21:22:14,78383,0.0000
19,27038561,2025-08-20,CK,Free Business Checking,2900.02,2156.61,CK25,FRANK P. WILHELM,SHEAR ARTISTRY INC,ACT,...,2170.294167,3764.99,0,0,0,0,189113.0,2020-02-26 23:28:13,39341,0.0000
20,27041146,2025-08-20,CK,1st Choice Business Checking,289438.57,299602.67,CK24,GEORGE J. MENDROS,COLLINS SMITH & O'CONNOR LLP,ACT,...,298992.355833,319929.00,0,0,0,0,182088.0,2020-02-26 23:28:03,3698,0.0000
21,27046087,2025-08-20,CK,Business Checking,2771.40,3837.92,CK12,ALISSA E. HALL,NICORB INC,ACT,...,16007.492500,45350.37,0,1,0,0,187368.0,2025-04-07 20:41:55,3620,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62631,151062561,2025-08-20,CK,Business Elite Money Market,1802.01,1172.01,CK30,LAURA A. STACK,"SUMNER AVE, LLC",ACT,...,2467.970000,4504.32,0,0,0,0,,NaT,78845,0.0095
62643,150684184,2025-08-20,CK,Business Money Market,37044.20,37044.20,CK19,JUSTIN A. JEFFREY,DAZ CLEANING CO,ACT,...,28980.158333,20559.02,0,0,0,0,235667.0,2021-12-08 22:12:36,64016,0.0036
62654,151104925,2025-08-20,CK,Free Business Checking,1754.00,1754.00,CK25,MARK A. BORKMAN,15 GARRITY RD LLC,ACT,...,3138.182727,0.00,0,0,0,0,,NaT,60428,0.0000
62694,150952490,2025-08-20,TD,6 Month Business CD,10000.00,10000.00,CD17,JACQUELINE A. THEIS,KIWANIS CLUB OF TAUNTON INC,ACT,...,13836.605833,18622.49,0,0,0,0,186458.0,2023-09-26 22:26:55,4324,0.0390


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6540 entries, 9 to 62697
Data columns (total 23 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   acctnbr             6540 non-null   object        
 1   effdate             6540 non-null   datetime64[ns]
 2   mjaccttypcd         6540 non-null   object        
 3   product             6540 non-null   object        
 4   notebal             6540 non-null   float64       
 5   notemtdavgbal       6540 non-null   float64       
 6   currmiaccttypcd     6540 non-null   object        
 7   acctofficer         6539 non-null   object        
 8   ownersortname       6540 non-null   object        
 9   curracctstatcd      6540 non-null   object        
 10  contractdate        6539 non-null   datetime64[ns]
 11  ytdavgbal           6540 non-null   float64       
 12  3Mo_AvgBal          6540 non-null   float64       
 13  TTM_AvgBal          6540 non-null   float64       
 

In [7]:




# %%
# %%
ASSETS_PATH = Path('./input')

files = [f for f in ASSETS_PATH.iterdir() if f.is_file()]

assert len(files) == 1, f"Expected exactly 1 file in {ASSETS_PATH}, found {len(files)}."

file = files[0]
assert file.suffix == '.csv', f"Expected an excel file"

xaa_data = pd.read_csv(file)

# %%
# xaa_data.info()

#

# %%

In [8]:
xaa_data

Unnamed: 0,Cycle End Date,Analysis Account Type,Application ID,Account Name,Debit Account Number,Analyzed Charges (Pre-ECR),Earnings Credit Rate,Earnings Credit Amount,Analyzed Result Month-to-Date (Post-ECR),Fee-Based Result Month-to-Date (Hard Charges),Combined Result for Settlement Period (Post-ECR),Primary Officer Name,Secondary Officer Name,Treasury Officer Name
0,6/30/2025,13,C,YOUNG MEN'S CHRISTIAN AS,24703748,$0.00,0.0,$0.00,$0.00,$0.00,$0.00,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
1,6/30/2024,12,C,YOUNG MEN'S CHRISTIAN AS,24703748,$25.00,0.0,$0.00,$25.00,$0.00,$25.00,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
2,7/31/2024,12,C,YOUNG MEN'S CHRISTIAN AS,24703748,$25.00,0.0,$0.00,$25.00,$0.00,$25.00,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
3,8/31/2024,13,C,YOUNG MEN'S CHRISTIAN AS,24703748,$0.00,0.0,$0.00,$0.00,$0.00,$0.00,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
4,9/30/2024,13,C,YOUNG MEN'S CHRISTIAN AS,24703748,$0.00,0.0,$0.00,$0.00,$0.00,$0.00,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62291,7/31/2025,25,D,RBAK REALTY LLC,151182880,$40.00,0.0,$0.00,$40.00,$0.00,$40.00,,,
62292,7/31/2025,25,D,MCC INVESTMENTS LLC,151183086,$0.00,0.0,$0.00,$0.00,$0.00,$0.00,,,
62293,7/31/2025,25,D,WALSHS LANDSCAPING LLC,151183391,$0.00,0.0,$0.00,$0.00,$0.00,$0.00,AMY M. BRIGGS,,
62294,7/31/2025,25,D,HEADSTRONG,151183474,$0.00,0.0,$0.00,$0.00,$0.00,$0.00,RICHARD J. CLARK,,


In [9]:
xaa_data['Debit Account Number'].nunique()

5182

In [10]:




# # %%
# xaa_data['Analyzed Charges (Pre-ECR)'] = xaa_data['Analyzed Charges (Pre-ECR)'].str.replace('[\$,]','',regex=True)
# xaa_data['Combined Result for Settlement Period (Post-ECR + Fee-Based Total)'] = xaa_data['Combined Result for Settlement Period (Post-ECR + Fee-Based Total)'].str.replace('[\$,]','',regex=True)

# Rename to match schema from earlier
xaa_data = xaa_data.rename(columns={
    'Analyzed Charges (Pre-ECR)':'Analyzed Charges',
    'Combined Result for Settlement Period (Post-ECR)':'Combined Result for Settlement Period'
}).copy()


In [11]:
xaa_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62296 entries, 0 to 62295
Data columns (total 14 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Cycle End Date                                 62296 non-null  object 
 1   Analysis Account Type                          62296 non-null  int64  
 2   Application ID                                 62296 non-null  object 
 3   Account Name                                   62296 non-null  object 
 4   Debit Account Number                           62296 non-null  int64  
 5   Analyzed Charges                               62296 non-null  object 
 6   Earnings Credit Rate                           62296 non-null  float64
 7   Earnings Credit Amount                         62296 non-null  object 
 8   Analyzed Result Month-to-Date (Post-ECR)       62296 non-null  object 
 9   Fee-Based Result Month-to-Date (Hard Charges)  622

In [12]:
cols_to_adjust = ['Analyzed Charges','Combined Result for Settlement Period']

for col in cols_to_adjust:
    xaa_data[col] = xaa_data[col].str.replace(r'[$,]','', regex=True).astype(float)

In [13]:
xaa_data

Unnamed: 0,Cycle End Date,Analysis Account Type,Application ID,Account Name,Debit Account Number,Analyzed Charges,Earnings Credit Rate,Earnings Credit Amount,Analyzed Result Month-to-Date (Post-ECR),Fee-Based Result Month-to-Date (Hard Charges),Combined Result for Settlement Period,Primary Officer Name,Secondary Officer Name,Treasury Officer Name
0,6/30/2025,13,C,YOUNG MEN'S CHRISTIAN AS,24703748,0.0,0.0,$0.00,$0.00,$0.00,0.0,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
1,6/30/2024,12,C,YOUNG MEN'S CHRISTIAN AS,24703748,25.0,0.0,$0.00,$25.00,$0.00,25.0,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
2,7/31/2024,12,C,YOUNG MEN'S CHRISTIAN AS,24703748,25.0,0.0,$0.00,$25.00,$0.00,25.0,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
3,8/31/2024,13,C,YOUNG MEN'S CHRISTIAN AS,24703748,0.0,0.0,$0.00,$0.00,$0.00,0.0,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
4,9/30/2024,13,C,YOUNG MEN'S CHRISTIAN AS,24703748,0.0,0.0,$0.00,$0.00,$0.00,0.0,INTERNAL ACCOUNTS DI,,STEPHEN SHERMAN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62291,7/31/2025,25,D,RBAK REALTY LLC,151182880,40.0,0.0,$0.00,$40.00,$0.00,40.0,,,
62292,7/31/2025,25,D,MCC INVESTMENTS LLC,151183086,0.0,0.0,$0.00,$0.00,$0.00,0.0,,,
62293,7/31/2025,25,D,WALSHS LANDSCAPING LLC,151183391,0.0,0.0,$0.00,$0.00,$0.00,0.0,AMY M. BRIGGS,,
62294,7/31/2025,25,D,HEADSTRONG,151183474,0.0,0.0,$0.00,$0.00,$0.00,0.0,RICHARD J. CLARK,,


In [14]:
xaa_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62296 entries, 0 to 62295
Data columns (total 14 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Cycle End Date                                 62296 non-null  object 
 1   Analysis Account Type                          62296 non-null  int64  
 2   Application ID                                 62296 non-null  object 
 3   Account Name                                   62296 non-null  object 
 4   Debit Account Number                           62296 non-null  int64  
 5   Analyzed Charges                               62296 non-null  float64
 6   Earnings Credit Rate                           62296 non-null  float64
 7   Earnings Credit Amount                         62296 non-null  object 
 8   Analyzed Result Month-to-Date (Post-ECR)       62296 non-null  object 
 9   Fee-Based Result Month-to-Date (Hard Charges)  622

In [15]:

# %%
xaa_schema = {
    'Analyzed Charges':'float',
    'Combined Result for Settlement Period':'float',
    'Earnings Credit Rate':'float',
    'Debit Account Number':'str'
}
xaa_data = cdutils.input_cleansing.enforce_schema(xaa_data, xaa_schema)




In [16]:

# %%

from datetime import datetime, timedelta

def create_account_summary_alternative(xaa_data, date_col='cycle_date'):
    # Ensure date column is datetime
    xaa_data = xaa_data.copy()
    xaa_data[date_col] = pd.to_datetime(xaa_data[date_col])
    # Calculate cutoff date for 12 months
    max_date = xaa_data[date_col].max()
    cutoff_date = max_date - timedelta(days=365)
    # Add ranking column to identify latest month per account
    xaa_data['date_rank'] = (xaa_data
                            .groupby('Debit Account Number')[date_col]
                            .rank(method='dense', ascending=False))
    # Create flags for latest month and trailing 12 months
    xaa_data['is_latest_month'] = xaa_data['date_rank'] == 1
    xaa_data['is_trailing_12m'] = xaa_data[date_col] >= cutoff_date
    # Aggregate using conditional sums
    summary = (xaa_data
            .groupby('Debit Account Number')
            .agg({
                # Latest month aggregations
                'Analyzed Charges': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].sum(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].sum(),
                ],
                'Combined Result for Settlement Period': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].sum(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].sum()
                ],
                'Earnings Credit Rate': [
                    lambda x: x[xaa_data.loc[x.index, 'is_latest_month']].mean(),
                    lambda x: x[xaa_data.loc[x.index, 'is_trailing_12m']].mean()
                ],
                'Primary Officer Name': 'first',
                'Secondary Officer Name': 'first',
                'Treasury Officer Name': 'first'
            })
            .reset_index())
    # Flatten column names
    summary.columns = [
        'Debit Account Number',
        'Latest_Month_Analyzed_Charges',
        'Trailing_12M_Analyzed_Charges',
        'Latest_Month_Combined_Result',
        'Trailing_12M_Combined_Result',
        'Latest_Month_ECR',
        'Trailing_12M_Avg_ECR',
        'Primary_Officer_Name_XAA',
        'Secondary_Officer_Name_XAA',
        'Treasury_Officer_Name_XAA'
    ]
    # Reorder columns
    column_order = [
        'Debit Account Number',
        'Latest_Month_Analyzed_Charges',
        'Latest_Month_Combined_Result',
        'Trailing_12M_Analyzed_Charges',
        'Trailing_12M_Combined_Result',
        'Latest_Month_ECR',
        'Trailing_12M_Avg_ECR',
        'Primary_Officer_Name_XAA',
        'Secondary_Officer_Name_XAA',        
        'Treasury_Officer_Name_XAA'
    ]
    return summary[column_order]


# %%
summarized_xaa = create_account_summary_alternative(xaa_data, date_col='Cycle End Date')

# %%
summarized_xaa_schema = {
    'Primary_Officer_Name_XAA':'str',
    'Secondary_Officer_Name_XAA':'str',        
    'Treasury_Officer_Name_XAA':'str'
}
summarized_xaa = cdutils.input_cleansing.enforce_schema(summarized_xaa, summarized_xaa_schema)

# %%

# %%
summarized_xaa = summarized_xaa.rename(columns={
    'Debit Account Number':'acctnbr',

}).copy()

assert summarized_xaa['acctnbr'].is_unique, "Duplicates"




# %%




In [17]:
data

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,householdnbr,datelastmaint,portfolio_key,noteintrate
9,150820952,2025-08-20,CK,Free Business Checking,26113.47,17086.04,CK25,FRANK P. WILHELM,GORMAN'S WELDING INC,ACT,...,16793.979167,8565.16,4,2,0,0,127947.0,2022-11-21 22:38:07,33291,0.0000
15,151030469,2025-08-20,CK,Free Business Checking,214.49,233.26,CK25,MICHAEL A. HEY,"HAIR POWER, INC.",ACT,...,413.902500,615.78,7,16,1,7,285587.0,2024-04-01 21:22:14,78383,0.0000
19,27038561,2025-08-20,CK,Free Business Checking,2900.02,2156.61,CK25,FRANK P. WILHELM,SHEAR ARTISTRY INC,ACT,...,2170.294167,3764.99,0,0,0,0,189113.0,2020-02-26 23:28:13,39341,0.0000
20,27041146,2025-08-20,CK,1st Choice Business Checking,289438.57,299602.67,CK24,GEORGE J. MENDROS,COLLINS SMITH & O'CONNOR LLP,ACT,...,298992.355833,319929.00,0,0,0,0,182088.0,2020-02-26 23:28:03,3698,0.0000
21,27046087,2025-08-20,CK,Business Checking,2771.40,3837.92,CK12,ALISSA E. HALL,NICORB INC,ACT,...,16007.492500,45350.37,0,1,0,0,187368.0,2025-04-07 20:41:55,3620,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62631,151062561,2025-08-20,CK,Business Elite Money Market,1802.01,1172.01,CK30,LAURA A. STACK,"SUMNER AVE, LLC",ACT,...,2467.970000,4504.32,0,0,0,0,,NaT,78845,0.0095
62643,150684184,2025-08-20,CK,Business Money Market,37044.20,37044.20,CK19,JUSTIN A. JEFFREY,DAZ CLEANING CO,ACT,...,28980.158333,20559.02,0,0,0,0,235667.0,2021-12-08 22:12:36,64016,0.0036
62654,151104925,2025-08-20,CK,Free Business Checking,1754.00,1754.00,CK25,MARK A. BORKMAN,15 GARRITY RD LLC,ACT,...,3138.182727,0.00,0,0,0,0,,NaT,60428,0.0000
62694,150952490,2025-08-20,TD,6 Month Business CD,10000.00,10000.00,CD17,JACQUELINE A. THEIS,KIWANIS CLUB OF TAUNTON INC,ACT,...,13836.605833,18622.49,0,0,0,0,186458.0,2023-09-26 22:26:55,4324,0.0390


In [18]:
summarized_xaa

Unnamed: 0,acctnbr,Latest_Month_Analyzed_Charges,Latest_Month_Combined_Result,Trailing_12M_Analyzed_Charges,Trailing_12M_Combined_Result,Latest_Month_ECR,Trailing_12M_Avg_ECR,Primary_Officer_Name_XAA,Secondary_Officer_Name_XAA,Treasury_Officer_Name_XAA
0,1000010034,0.0,0.0,0.0,0.0,0.0,0.0,CHRISTINE M. PAREDES,,STEPHEN SHERMAN
1,100038,0.0,0.0,0.0,0.0,0.0,0.0,,,
2,100042,0.0,0.0,0.0,0.0,0.0,0.0,,,
3,100060,0.0,0.0,0.0,0.0,0.0,0.0,,,
4,100102,70.0,35.0,910.0,455.0,0.0,0.0,,,
...,...,...,...,...,...,...,...,...,...,...
5177,9001819,0.0,0.0,0.0,0.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN
5178,9003080,0.0,8.0,0.0,32.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN
5179,919666,0.0,0.0,0.0,0.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN
5180,996290,0.0,0.0,0.0,0.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN


In [19]:



# %%
# %%
merged_data = pd.merge(data, summarized_xaa, on='acctnbr', how='outer', indicator=True)

# %%


In [29]:
merged_data

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,Latest_Month_Analyzed_Charges,Latest_Month_Combined_Result,Trailing_12M_Analyzed_Charges,Trailing_12M_Combined_Result,Latest_Month_ECR,Trailing_12M_Avg_ECR,Primary_Officer_Name_XAA,Secondary_Officer_Name_XAA,Treasury_Officer_Name_XAA,_merge
0,1000010034,NaT,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,CHRISTINE M. PAREDES,,STEPHEN SHERMAN,right_only
1,100038,NaT,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,right_only
2,100042,2025-08-20,CK,Free Business Checking,7946.81,7642.64,CK25,TIFFANY J. CAHILL,WOOD ITEMS & MORE,ACT,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,both
3,100060,2025-08-20,CK,Free Business Checking,120311.72,248136.73,CK25,TIFFANY J. CAHILL,"LORING FINANCIAL PLANNING & INVESTMENTS, INC.",ACT,...,0.0,0.0,0.0,0.0,0.0,0.0,,,,both
4,100102,2025-08-20,CK,Free Business Checking,45219.29,41908.54,CK25,TIFFANY J. CAHILL,SRU INC,ACT,...,70.0,35.0,910.0,455.0,0.0,0.0,,,,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8096,9003080,NaT,,,,,,,,,...,0.0,8.0,0.0,32.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN,right_only
8097,9003486,2025-08-20,CK,Business Money Market,4910.96,5590.96,CK19,AMY M. BRIGGS,FINCH FARM TRUST,ACT,...,,,,,,,,,,left_only
8098,919666,2025-08-20,CK,Free Business Checking,39607.62,39607.62,CK25,AMY M. BRIGGS,COMMITTEE TO ELECT JOSEPH R PACHECO,ACT,...,0.0,0.0,0.0,0.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN,both
8099,996290,NaT,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN,right_only


In [30]:
xaa_only = merged_data[merged_data['_merge'] == 'right_only'].copy()

In [31]:
xaa_only

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,Latest_Month_Analyzed_Charges,Latest_Month_Combined_Result,Trailing_12M_Analyzed_Charges,Trailing_12M_Combined_Result,Latest_Month_ECR,Trailing_12M_Avg_ECR,Primary_Officer_Name_XAA,Secondary_Officer_Name_XAA,Treasury_Officer_Name_XAA,_merge
0,1000010034,NaT,,,,,,,,,...,0.0,0.00,0.00,0.00,0.0,0.0,CHRISTINE M. PAREDES,,STEPHEN SHERMAN,right_only
1,100038,NaT,,,,,,,,,...,0.0,0.00,0.00,0.00,0.0,0.0,,,,right_only
7,100170,NaT,,,,,,,,,...,0.0,8.00,0.00,0.00,0.0,,,,,right_only
11,100408,NaT,,,,,,,,,...,190.6,60.38,570.63,138.91,0.5,0.5,,,,right_only
19,101284,NaT,,,,,,,,,...,0.0,0.00,0.00,0.00,0.0,0.0,,,,right_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7968,63179466,NaT,,,,,,,,,...,10.0,10.00,120.00,120.00,0.0,0.0,STACIE A. LONG,,STEPHEN SHERMAN,right_only
7980,63327422,NaT,,,,,,,,,...,0.0,0.00,0.00,0.00,0.0,0.0,ANGELEBETH H. FARIA,,STEPHEN SHERMAN,right_only
8096,9003080,NaT,,,,,,,,,...,0.0,8.00,0.00,32.00,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN,right_only
8099,996290,NaT,,,,,,,,,...,0.0,0.00,0.00,0.00,0.0,0.0,AMY M. BRIGGS,,STEPHEN SHERMAN,right_only


In [32]:
full_data_copy

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,notebal,notemtdavgbal,currmiaccttypcd,acctofficer,ownersortname,curracctstatcd,...,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,householdnbr,datelastmaint,portfolio_key,noteintrate
0,26079283,2025-08-20,CK,Basic Checking,2575.46,2750.46,CK02,AMY M. BRIGGS,"ROOS, MARY E.",ACT,...,2722.157500,6755.54,0,0,0,0,164176.0,2020-02-26 23:27:59,35416,0.0000
1,151008193,2025-08-20,TD,3 Month Prime Time CD,69384.74,68708.11,CD13,JUSTIN A. JEFFREY,"HENRIQUES, KAREN M.",ACT,...,67377.936667,65669.49,0,0,0,0,131604.0,2024-02-15 21:14:43,3863,0.0400
2,26120793,2025-08-20,CK,Personal Checking,5268.61,5268.61,CK01,AMY M. BRIGGS,"NORCROSS, DAVID P.",ACT,...,3123.140833,3325.45,0,0,0,0,153141.0,2020-02-26 23:28:12,2802,0.0000
3,26087162,2025-08-20,CK,eChecking (18 & over),52.15,119.31,CK04,JACQUELINE A. THEIS,"FERREIRA, DARIAN TYLER",ACT,...,14.584167,28.85,1,4,0,1,122599.0,2020-02-26 23:27:59,1892,0.0000
4,26087642,2025-08-20,CK,eChecking (18 & over),14.60,14.60,CK04,CHRISTINE M. PAREDES,"CAMARA, DYLAN M.",ACT,...,89.134167,34.68,0,0,0,0,214793.0,2021-02-22 22:26:09,36026,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62698,151085290,2025-08-20,SAV,Select High Yield,32582.63,32505.29,SV08,SUSAN M. FARLEY,"MITCHELL, DIANA D.",ACT,...,36030.786667,0.00,0,0,0,0,231336.0,2024-08-01 21:24:35,62938,0.0305
62699,151138297,2025-08-20,CK,NOW Checking,400.07,400.06,CK05,TIFFANY J. CAHILL,"VIEIRA, ANGELA C.",ACT,...,372.371429,0.00,0,0,0,0,213201.0,2025-01-16 20:44:39,534,0.0003
62700,151090687,2025-08-20,TD,3 Month Prime Time CD,26090.03,25835.60,CD13,FRANK P. WILHELM,"NOELTE, BEVERLY J.",ACT,...,24420.151667,0.00,0,0,0,0,153026.0,2024-08-15 22:35:25,2362,0.0400
62701,151112522,2025-08-20,CK,Prime Time Checking,9933.13,9932.74,CK06,TIFFANY J. CAHILL,"CONNELL QUETTA, SUSAN A.",ACT,...,9303.202000,0.00,0,0,0,0,234485.0,2024-10-21 20:54:38,63718,0.0005


In [33]:
check = pd.merge(xaa_only, full_data_copy, on='acctnbr', how='left')

In [35]:
check

Unnamed: 0,acctnbr,effdate_x,mjaccttypcd_x,product_x,notebal_x,notemtdavgbal_x,currmiaccttypcd_x,acctofficer_x,ownersortname_x,curracctstatcd_x,...,TTM_AvgBal_y,Year Ago Balance_y,TTM_DAYS_OVERDRAWN_y,TTM_NSF_COUNT_y,YTD_DAYS_OVERDRAWN_y,YTD_NSF_COUNT_y,householdnbr_y,datelastmaint_y,portfolio_key_y,noteintrate_y
0,1000010034,NaT,,,,,,,,,...,15738.520833,19209.60,0.0,0.0,0.0,0.0,304713.0,2025-01-27 20:56:26,58888.0,0.0005
1,100038,NaT,,,,,,,,,...,7064.321667,3352.90,0.0,0.0,0.0,0.0,,NaT,83.0,0.0005
2,100170,NaT,,,,,,,,,...,,,,,,,,NaT,,
3,100408,NaT,,,,,,,,,...,,,,,,,,NaT,,
4,101284,NaT,,,,,,,,,...,11882.746667,7526.78,0.0,0.0,0.0,0.0,,NaT,191.0,0.0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1556,63179466,NaT,,,,,,,,,...,721.367500,707.22,0.0,0.0,0.0,0.0,,NaT,33010.0,0.0350
1557,63327422,NaT,,,,,,,,,...,10812.284167,14968.04,0.0,0.0,0.0,0.0,183774.0,2020-02-26 23:28:03,40775.0,0.0005
1558,9003080,NaT,,,,,,,,,...,,,,,,,,NaT,,
1559,996290,NaT,,,,,,,,,...,255.010000,255.01,0.0,0.0,0.0,0.0,187114.0,2020-02-26 23:28:03,1558.0,0.0100


In [None]:


fill_na_column_list = [
    'Latest_Month_Analyzed_Charges',
    'Latest_Month_Combined_Result',
    'Trailing_12M_Analyzed_Charges',
    'Trailing_12M_Combined_Result',
    'Latest_Month_ECR',
    'Trailing_12M_Avg_ECR',
]
for item in fill_na_column_list:
    merged_data[item] = merged_data[item].fillna(0)




In [None]:
# Sort descending order of notebal
merged_data = merged_data.sort_values(by='notebal', ascending=False)

# %%
# merged_data.info()

# %%
merged_data



# %%
# This part doesn't work. Look at noteinrate, gets weird

# %%
formatted_data = src.core_transform.main_pipeline(merged_data)

# %%
formatted_data

# %%
formatted_data = formatted_data.rename(columns={
    'portfolio_key':'Portfolio Key',
    'product':'Product',
    '3Mo_AvgBal':'3Mo Avg Bal',
    'TTM_AvgBal':'TTM Avg Bal',
    'TTM_DAYS_OVERDRAWN':'TTM Days Overdrawn',
    'TTM_NSF_COUNT':'TTM NSF Count'
}).copy()


# %%
# Create summary sheet

summary_data = formatted_data[~(formatted_data['Portfolio Key'] == "") & (formatted_data['Acct No.'] == "")].copy()
summary_data = summary_data[[
    'Portfolio Key',
    'Borrower Name',
    'Account Officer',
    'Cash Management Officer',
    'Current Balance',
    'Interest Rate',
    '3Mo Avg Bal',
    'TTM Avg Bal',
    'Year Ago Balance',
    'TTM Days Overdrawn',
    'TTM NSF Count',
    'Current Mo Analyzed Fees (Pre-ECR)',
    'Current Mo Net Analyzed Fees (Post-ECR)',
    'TTM Analyzed Fees (Pre-ECR)',
    'TTM Net Analyzed Fees (Post-ECR)',
    'Current ECR'
]].copy()


# %%
# %%
# Output to excel (raw data)
# BASE_PATH = Path('.')
OUTPUT_PATH = BASE_PATH / Path('./output/business_deposits_concentration_with_xaa.xlsx')
with pd.ExcelWriter(OUTPUT_PATH, engine="openpyxl") as writer:
    formatted_data.to_excel(writer, sheet_name='Relationship Detail', index=False)
    summary_data.to_excel(writer, sheet_name='Relationship Summary', index=False)
    merged_data.to_excel(writer, sheet_name='Unformatted', index=False)


# Format excel
src.output_to_excel_multiple_sheets.format_excel_file(OUTPUT_PATH)


# Usage
# # Distribution
recipients = [
    # "chad.doorley@bcsbmail.com"
    "Hasan.Ali@bcsbmail.com",
    "steve.sherman@bcsbmail.com",
    "Michael.Patacao@bcsbmail.com",
    "Jeffrey.Pagliuca@bcsbmail.com",
    "Timothy.Chaves@bcsbmail.com",
    "Isaura.Tavares@bcsbmail.com",
    "Taylor.Tierney@bcsbmail.com",
    "Anderson.Lovos@bcsbmail.com",

]
bcc_recipients = [
    "chad.doorley@bcsbmail.com",
    "businessintelligence@bcsbmail.com"
]

prev_month = datetime.now() - relativedelta(months=1)
result = prev_month.strftime("%B %Y")

subject = f"Business Deposits + XAA Concentration Report - {result}" 
body = "Hi all, \n\nAttached is the Business Deposits + XAA Concentration Report through the most recent month end. If you have any questions, please reach out to BusinessIntelligence@bcsbmail.com\n\n"
attachment_paths = [OUTPUT_PATH]

# cdutils.distribution.email_out(
#     recipients = recipients, 
#     bcc_recipients = bcc_recipients, 
#     subject = subject, 
#     body = body, 
#     attachment_paths = attachment_paths
#     )

# if __name__ == '__main__':
# print(f"Starting [{__version__}]")
# # main(production_flag=True)
# main()
# print("Complete!")



# %%





In [None]:
summarized_xaa