In [25]:

# %%
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

# %%

"""
Main Entry Point
"""
from pathlib import Path

import pandas as pd  # type: ignore

import cdutils.pkey_sqlite  # type: ignore
import cdutils.filtering  # type: ignore
import cdutils.input_cleansing  # type: ignore
import cdutils.cmo_append  # type: ignore
import src.add_fields
import src.core_transform
import src.output_to_excel
from src._version import __version__
import src.output_to_excel_multiple_sheets
import cdutils.distribution  # type: ignore
from datetime import datetime
from dateutil.relativedelta import relativedelta

# # Ensure BASE_PATH for output
# try:
#     BASE_PATH
# except NameError:
#     BASE_PATH = Path('.')
# (BASE_PATH / "output").mkdir(parents=True, exist_ok=True)

# %%

# Get staging data from the daily deposit update. View dev section of documentation for more detail
INPUT_PATH = Path(r"\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Data_Analytics\Daily_Deposit_Update\Production\output\DailyDeposit_staging.xlsx")
data = pd.read_excel(INPUT_PATH)

# Add portfolio key
data = cdutils.pkey_sqlite.add_pkey(data)

# Add int rate
data = src.add_fields.add_noteintrate(data)

# Custom list of minors (Business Deposits)
minors = [
    'CK24',  # 1st Business Checking
    'CK12',  # Business Checking
    'CK25',  # Simple Business Checking
    'CK30',  # Business Elite Money Market
    'CK19',  # Business Money Market
    'CK22',  # Business Premium Plus MoneyMkt
    'CK23',  # Premium Business Checking
    'CK40',  # Community Assoc Reserve
    'CD67',  # Commercial Negotiated Rate
    'CD01',  # 1 Month Business CD
    'CD07',  # 3 Month Business CD
    'CD17',  # 6 Month Business CD
    'CD31',  # 1 Year Business CD
    'CD35',  # 1 Year Business CD
    'CD37',  # 18 Month Business CD
    'CD38',  # 2 Year Business CD
    'CD50',  # 3 Year Business CD
    'CD53',  # 4 Year Business CD
    'CD59',  # 5 Year Business CD
    'CD76',  # 9 Month Business CD
    'CD84',  # 15 Month Business CD
    'CD95',  # Business <12 Month Simple CD
    'CD96',  # Business >12 Month Simple CD
    'CK28',  # Investment Business Checking
    'CK33',  # Specialty Business Checking
    'CK34',  # ICS Shadow - Business - Demand
    'SV06',  # Business Select High Yield
    'CK13',
    'CK15',
    'CK41'
]

# Filter to only business deposit accounts
data = cdutils.filtering.filter_to_business_deposits(data, minors)

# Add CMO
data = cdutils.cmo_append.append_cmo(data)

data_schema = {
    'noteintrate': float
}
data = cdutils.input_cleansing.enforce_schema(data, data_schema).copy()

# Exclude BCSB internal accounts
data = data[~data['ownersortname'].str.contains('BRISTOL COUNTY SAVINGS', case=False, na=False)].copy()




In [28]:
# %%

# Load XAA CSV from ./assets (expect exactly one .csv file)
ASSETS_PATH = Path('./assets')
files = [f for f in ASSETS_PATH.iterdir() if f.is_file()]
assert len(files) == 1, f"Expected exactly 1 file in {ASSETS_PATH}, found {len(files)}."
file = files[0]
assert file.suffix == '.csv', f"Expected a csv file"
xaa_data = pd.read_csv(file)

# Ensure proper datetime on Cycle End Date
xaa_data['Cycle End Date'] = pd.to_datetime(xaa_data['Cycle End Date'])

# Normalize XAA column names used later
xaa_data = xaa_data.rename(columns={
    'Analyzed Charges (Pre-ECR)': 'Analyzed Charges',
    'Combined Result for Settlement Period (Post-ECR)': 'Combined Result for Settlement Period'
})

# Fix CSV formatting of float fields if they came in as strings with $ or commas
for col in ['Analyzed Charges', 'Combined Result for Settlement Period']:
    if col in xaa_data.columns and xaa_data[col].dtype == 'O':
        xaa_data[col] = xaa_data[col].str.replace(r'[$,]', '', regex=True)

xaa_schema = {
    'Analyzed Charges': 'float',
    'Combined Result for Settlement Period': 'float',
    'Earnings Credit Rate': 'float',
    'Debit Account Number': 'str'
}
xaa_data = cdutils.input_cleansing.enforce_schema(xaa_data, xaa_schema)

# %%

# ---------- Summarizer that matches your rules ----------
from typing import Optional, Union

def summarize_xaa_for_latest_and_ttm(
    xaa_df: pd.DataFrame,
    *,
    date_col: str = "Cycle End Date",
    account_col: str = "Debit Account Number",
    charges_col: str = "Analyzed Charges",
    result_col: str = "Combined Result for Settlement Period",
    ecr_col: str = "Earnings Credit Rate",
    target_month: Optional[Union[str, pd.Period, pd.Timestamp]] = None,
    today: Optional[pd.Timestamp] = None,
) -> pd.DataFrame:
    """
    Build per-account summary using:
      - Latest month: previous month-end on/before 'today', unless 'target_month' is provided (e.g. '2025-08' -> 2025-08-31)
      - Trailing 12 months: dates within [today - 12 months, today], inclusive

    Returns one row per account with:
      account_col,
      Latest_Month_Analyzed_Charges, Latest_Month_Combined_Result, Latest_Month_ECR,
      Trailing_12M_Analyzed_Charges, Trailing_12M_Combined_Result, Trailing_12M_Avg_ECR,
      Primary_Officer_Name_XAA, Secondary_Officer_Name_XAA, Treasury_Officer_Name_XAA
    """
    df = xaa_df.copy()
    df[date_col] = pd.to_datetime(df[date_col]).dt.normalize()
    if today is None:
        today = pd.Timestamp.today().normalize()

    # Determine target month-end date
    if target_month is not None:
        if isinstance(target_month, pd.Timestamp):
            target_period = target_month.to_period("M")
        elif isinstance(target_month, pd.Period):
            target_period = target_month.asfreq("M")
        else:
            target_period = pd.Period(str(target_month), freq="M")
        target_eom = target_period.to_timestamp(how="end").normalize()
    else:
        # Most recent end-of-month on or before 'today'
        target_eom = today if today.is_month_end else (today - pd.offsets.MonthEnd(1)).normalize()

    # Masks
    target_mask = df[date_col].eq(target_eom)
    ttm_start = (today - relativedelta(months=12)).normalize()
    trailing_mask = (df[date_col] >= ttm_start) & (df[date_col] <= today)

    # Coerce numerics robustly
    def _coerce_numeric(s: pd.Series) -> pd.Series:
        if s.dtype == "O":
            s = (s.astype(str)
                   .str.replace(r"[$,]", "", regex=True)
                   .replace({"": None}))
        return pd.to_numeric(s, errors="coerce")

    for col in (charges_col, result_col, ecr_col):
        if col in df.columns:
            df[col] = _coerce_numeric(df[col])

    # Officer names per account
    base = (df.groupby(account_col, as_index=False)
              .agg(Primary_Officer_Name_XAA=("Primary Officer Name", "first"),
                   Secondary_Officer_Name_XAA=("Secondary Officer Name", "first"),
                   Treasury_Officer_Name_XAA=("Treasury Officer Name", "first")))

    # Target month
    target = (df.loc[target_mask]
                .groupby(account_col, as_index=False)
                .agg(Latest_Month_Analyzed_Charges=(charges_col, "sum"),
                     Latest_Month_Combined_Result=(result_col, "sum"),
                     Latest_Month_ECR=(ecr_col, "mean")))

    # Trailing 12 months
    trailing = (df.loc[trailing_mask]
                  .groupby(account_col, as_index=False)
                  .agg(Trailing_12M_Analyzed_Charges=(charges_col, "sum"),
                       Trailing_12M_Combined_Result=(result_col, "sum"),
                       Trailing_12M_Avg_ECR=(ecr_col, "mean")))

    out = (base
           .merge(target, on=account_col, how="left")
           .merge(trailing, on=account_col, how="left"))

    # Fill NaNs
    for c in ["Latest_Month_Analyzed_Charges",
              "Latest_Month_Combined_Result",
              "Trailing_12M_Analyzed_Charges",
              "Trailing_12M_Combined_Result",
              "Latest_Month_ECR",
              "Trailing_12M_Avg_ECR"]:
        if c in out.columns:
            out[c] = out[c].fillna(0.0)

    # Order
    cols = [
        account_col,
        "Latest_Month_Analyzed_Charges",
        "Latest_Month_Combined_Result",
        "Trailing_12M_Analyzed_Charges",
        "Trailing_12M_Combined_Result",
        "Latest_Month_ECR",
        "Trailing_12M_Avg_ECR",
        "Primary_Officer_Name_XAA",
        "Secondary_Officer_Name_XAA",
        "Treasury_Officer_Name_XAA",
    ]
    return out[[c for c in cols if c in out.columns]].copy()

# -------- Use the summarizer (defaults to previous month-end relative to today) --------
summarized_xaa = summarize_xaa_for_latest_and_ttm(
    xaa_data,
    date_col='Cycle End Date',
    target_month="2025-07",  # <- optionally pass to force August 2025 (matches 2025-08-31)
)

# Enforce schema for officer name columns
summarized_xaa_schema = {
    'Primary_Officer_Name_XAA': 'str',
    'Secondary_Officer_Name_XAA': 'str',
    'Treasury_Officer_Name_XAA': 'str'
}
summarized_xaa = cdutils.input_cleansing.enforce_schema(summarized_xaa, summarized_xaa_schema)

# Prepare for merge
summarized_xaa = summarized_xaa.rename(columns={'Debit Account Number': 'acctnbr'}).copy()
assert summarized_xaa['acctnbr'].is_unique, "Duplicates in summarized XAA per account."



In [30]:
# Merge with main data
merged_data = pd.merge(data, summarized_xaa, on='acctnbr', how='left')

# Fill numeric NaNs post-merge
for item in [
    'Latest_Month_Analyzed_Charges',
    'Latest_Month_Combined_Result',
    'Trailing_12M_Analyzed_Charges',
    'Trailing_12M_Combined_Result',
    'Latest_Month_ECR',
    'Trailing_12M_Avg_ECR',
]:
    if item in merged_data.columns:
        merged_data[item] = merged_data[item].fillna(0.0)

# Sort descending order of notebal
if 'notebal' in merged_data.columns:
    merged_data = merged_data.sort_values(by='notebal', ascending=False)

# %%

# Downstream core transform
formatted_data = src.core_transform.main_pipeline(merged_data)

# Standardize friendly column names
formatted_data = formatted_data.rename(columns={
    'portfolio_key': 'Portfolio Key',
    'product': 'Product',
    '3Mo_AvgBal': '3Mo Avg Bal',
    'TTM_AvgBal': 'TTM Avg Bal',
    'TTM_DAYS_OVERDRAWN': 'TTM Days Overdrawn',
    'TTM_NSF_COUNT': 'TTM NSF Count'
}).copy()

# Create summary sheet
summary_data = formatted_data[~(formatted_data['Portfolio Key'] == "") & (formatted_data['Acct No.'] == "")].copy()
summary_data = summary_data[[
    'Portfolio Key',
    'Borrower Name',
    'Account Officer',
    'Cash Management Officer',
    'Current Balance',
    'Interest Rate',
    '3Mo Avg Bal',
    'TTM Avg Bal',
    'Year Ago Balance',
    'TTM Days Overdrawn',
    'TTM NSF Count',
    'Current Mo Analyzed Fees (Pre-ECR)',
    'Current Mo Net Analyzed Fees (Post-ECR)',
    'TTM Analyzed Fees (Pre-ECR)',
    'TTM Net Analyzed Fees (Post-ECR)',
    'Current ECR'
]].copy()

# Output to excel
OUTPUT_DIR = Path('./output')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH = OUTPUT_DIR / "business_deposits_concentration_with_xaa.xlsx"
with pd.ExcelWriter(OUTPUT_PATH, engine="openpyxl") as writer:
    formatted_data.to_excel(writer, sheet_name='Relationship Detail', index=False)
    summary_data.to_excel(writer, sheet_name='Relationship Summary', index=False)
    merged_data.to_excel(writer, sheet_name='Unformatted', index=False)

# Format excel
src.output_to_excel_multiple_sheets.format_excel_file(OUTPUT_PATH)



In [31]:
formatted_data

Unnamed: 0,Portfolio Key,Acct No.,Borrower Name,Product,Account Officer,Cash Management Officer,Current Balance,Interest Rate,Acct Open Date,3Mo Avg Bal,TTM Avg Bal,Year Ago Balance,TTM Days Overdrawn,TTM NSF Count,Current Mo Analyzed Fees (Pre-ECR),Current Mo Net Analyzed Fees (Post-ECR),TTM Analyzed Fees (Pre-ECR),TTM Net Analyzed Fees (Post-ECR),Current ECR
0,3545,60801557,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Investment Business Checking,KEVIN M. MCCARTHY,,26497668.04,0.022,2012-03-05 00:00:00,23828780.653333,18087603.505,11994449.71,0,0,0.0,0.0,0.0,0.0,0.0
1,3545,27063948,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Business Checking,KEVIN M. MCCARTHY,,1000000.0,0.0,2008-01-09 00:00:00,1015735.256667,1027207.276667,1036629.4,0,0,391.23,9.04,817.49,54.15,0.5
2,3545,60076593,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Business Elite Money Market,KEVIN M. MCCARTHY,,820999.0,0.0245,2006-08-08 00:00:00,815914.6,808494.269167,797744.16,0,0,0.0,0.0,0.0,0.0,0.0
3,3545,,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",,KEVIN M. MCCARTHY,,28318667.04,0.021296,,25660430.51,19923305.050833,13828823.27,0,0,391.23,9.04,817.49,54.15,0.5
4,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14849,890,,INDEPENDENT BENEFIT SOLUTIONS LLC,,JUSTIN A. JEFFREY,,-105.0,-0.0,,3789.493333,2983.259167,6127.26,19,17,0.0,0.0,0.0,8.0,0.0
14850,,,,,,,,,,,,,,,,,,,
14851,40774,29157269,"NYE-BUCKLEY, STACIE E.",Free Business Checking,MICHAEL A. HEY,,-915.73,0.0,2015-12-28 00:00:00,1970.1,1965.58,1672.71,51,34,0.0,0.0,0.0,0.0,0.0
14852,40774,,"NYE-BUCKLEY, STACIE E.",,MICHAEL A. HEY,,-915.73,-0.0,,1970.1,1965.58,1672.71,51,34,0.0,0.0,0.0,0.0,0.0


In [32]:
summary_data

Unnamed: 0,Portfolio Key,Borrower Name,Account Officer,Cash Management Officer,Current Balance,Interest Rate,3Mo Avg Bal,TTM Avg Bal,Year Ago Balance,TTM Days Overdrawn,TTM NSF Count,Current Mo Analyzed Fees (Pre-ECR),Current Mo Net Analyzed Fees (Post-ECR),TTM Analyzed Fees (Pre-ECR),TTM Net Analyzed Fees (Post-ECR),Current ECR
3,3545,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",KEVIN M. MCCARTHY,,28318667.04,0.021296,25660430.51,19923305.050833,13828823.27,0,0,391.23,9.04,817.49,54.15,0.5
17,45089,NEW ENGLAND TREATMENT ACCESS LLC,GEORGE J. MENDROS,,17341790.88,0.012016,17446791.75,20181653.4175,25349836.2,0,5,4253.53,1202.18,46872.77,13817.56,0.5
25,53182,"BETA GROUP, INC.",ANDREW J. OMER,,15630031.76,0.019042,14437690.106667,12434915.159167,11755403.69,0,0,0.0,0.0,0.0,0.0,0.0
37,3997,"HUTCHENS HOLDING III, LLC",BRANDON CANNATA,,10825055.81,0.011087,5182560.6,4281522.9575,2841416.7,0,1,464.23,173.63,4212.35,632.97,0.5
48,4982,"STURDY HEALTH FOUNDATION, INC.",INTERNAL ACCOUNTS DIRECTORS,,9866554.3,0.012247,9285735.04,8390581.848333,4089607.68,0,0,39.82,39.82,429.63,429.63,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14840,82800,OIL CAN SALOON LLC,JACQUELINE A. THEIS,,-58.43,-0.0,3564.166667,7991.616667,0.0,15,4,0.0,0.0,0.0,0.0,0.0
14843,59796,"AHAVA FOODS, LLC",PETER ST JEAN,,-58.65,-0.0,4057.64,7882.325,9859.13,1,20,0.0,0.0,0.0,0.0,0.0
14846,49979,"TOTAL COMFORT HEATING & COOLING, INC",CHRISTINE M. PAREDES,,-88.74,-0.0,822.71,9194.6925,15529.2,27,59,0.0,0.0,0.0,8.0,0.0
14849,890,INDEPENDENT BENEFIT SOLUTIONS LLC,JUSTIN A. JEFFREY,,-105.0,-0.0,3789.493333,2983.259167,6127.26,19,17,0.0,0.0,0.0,8.0,0.0


In [33]:
final_report_post_ecr = summary_data['Current Mo Net Analyzed Fees (Post-ECR)'].sum()
final_report_post_ecr

50181.31000000005

In [34]:
xaa_only_total = 563.41
print(f"${final_report_post_ecr + xaa_only_total:,.2f}")


$50,744.72


In [35]:
# Merge with main data
merged_data2 = pd.merge(data, summarized_xaa, on='acctnbr', how='outer', indicator=True)
# Filter rows based on column: '_merge'
merged_data2 = merged_data2[merged_data2['_merge'].apply(str).str.contains("right", regex=False, na=False, case=False)]

In [37]:
merged_data2['acctnbr'].nunique()

739

In [None]:
# Get more data about missing XAA items

In [None]:
INPUT_PATH = Path(r"\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Data_Analytics\Daily_Deposit_Update\Production\output\DailyDeposit_staging.xlsx")
full_data = pd.read_excel(INPUT_PATH)

In [None]:
full_data['acctnbr'] = full_data['acctnbr'].astype(str)

In [22]:
xaa_no_match = data.merge(merged_data2, how='inner', on='acctnbr')

In [23]:
xaa_no_match

Unnamed: 0,acctnbr,effdate_x,mjaccttypcd_x,product_x,notebal_x,notemtdavgbal_x,currmiaccttypcd_x,acctofficer_x,ownersortname_x,curracctstatcd_x,...,Latest_Month_Analyzed_Charges,Latest_Month_Combined_Result,Trailing_12M_Analyzed_Charges,Trailing_12M_Combined_Result,Latest_Month_ECR,Trailing_12M_Avg_ECR,Primary_Officer_Name_XAA,Secondary_Officer_Name_XAA,Treasury_Officer_Name_XAA,_merge
0,61059153,2025-09-09,CK,Municipal Money Market,54652.42,54652.42,CK18,STACIE A. LONG,TOWN OF ROCHESTER,DORM,...,0.0,0.0,100.0,100.0,0.0,0.0,JOHN G. DUGGAN,,STEPHEN SHERMAN,right_only
1,27067943,2025-09-09,CK,Municipal Money Market,431040.99,397461.82,CK18,JOHN G. DUGGAN,BRIDGEWATER-RAYNHAM REGIONAL SCHOOL DIST,ACT,...,0.0,0.0,350.0,350.0,0.0,0.0,JOHN G. DUGGAN,,,right_only
2,27071843,2025-09-09,CK,Money Market Statement,42.06,42.06,CK08,FRANK P. WILHELM,ESTATE OF LOUIS AMATO,DORM,...,0.0,0.0,0.0,0.0,0.0,0.0,DIANA M. ROCHEFORT,,STEPHEN SHERMAN,right_only
3,60077352,2025-09-09,CK,Muni Money Market,573299.85,504563.31,CK21,STACIE A. LONG,COUNTY OF BRISTOL,ACT,...,0.0,0.0,100.0,100.0,0.0,0.0,STACIE A. LONG,,STEPHEN SHERMAN,right_only
4,61050202,2025-09-09,CK,Municipal Money Market,105994.14,105994.14,CK18,JOHN G. DUGGAN,TOWN OF MEDFIELD,ACT,...,0.0,0.0,350.0,350.0,0.0,0.0,STACIE A. LONG,,STEPHEN SHERMAN,right_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,60102527,2025-09-09,CK,Municipal Money Market,39179.63,39179.63,CK18,STACIE A. LONG,TOWN OF NORTH ATTLEBORO,ACT,...,0.0,0.0,100.0,100.0,0.0,0.0,STACIE A. LONG,,STEPHEN SHERMAN,right_only
77,63174723,2025-09-09,CK,Municipal Money Market,1073.62,1073.62,CK18,JOHN G. DUGGAN,TOWN OF WALPOLE,ACT,...,0.0,0.0,350.0,350.0,0.0,0.0,JOHN G. DUGGAN,,STEPHEN SHERMAN,right_only
78,60104155,2025-09-09,CK,Municipal Money Market,3671.09,3671.09,CK18,STACIE A. LONG,TOWN OF NORTH ATTLEBORO,ACT,...,0.0,0.0,100.0,100.0,0.0,0.0,STACIE A. LONG,,STEPHEN SHERMAN,right_only
79,61059226,2025-09-09,CK,Municipal Money Market,858090.22,858090.22,CK18,PHALLINE SVAY,TOWN OF REHOBOTH,ACT,...,0.0,0.0,100.0,100.0,0.0,0.0,STACIE A. LONG,,STEPHEN SHERMAN,right_only
