In [1]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [2]:
import src.config

In [3]:
import pandas as pd
from deltalake import DeltaTable
from pathlib import Path

In [None]:
df = DeltaTable(src.config.SILVER / "account").to_pandas()

In [4]:
import cdutils.acct_file_creation.core
from datetime import datetime

# # Specific date
specified_date = datetime(2020, 12, 31)
df = cdutils.acct_file_creation.core.query_df_on_date(specified_date)

In [5]:
df

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,portfolio_key,ownership_key,address_key,householdnbr,datelastmaint,Category,inactivedate,branchname,primaryownercity,primaryownerstate
0,2020-12-31,150200005,MoneyGram Payment Systems Inc,Treasurer's Check,0.0,,BKCK,TRCK,ACT,0.000000,...,41513.0,43261.0,44260.0,187340.0,2020-02-26 23:28:03,,NaT,BCSB - Main Office,Minneapolis,MN
1,2020-12-31,61053112,City Of Taunton,Municipal Money Market,0.0,VAR,CK,CK18,ACT,0.001500,...,4184.0,33668.0,33967.0,97724.0,2020-02-26 23:28:03,,NaT,BCSB - Muni Main Office,Taunton,MA
2,2020-12-31,150544263,REDBROOK APARTMENTS LLC,Commercial Mortgages,42000000.0,VAR,CML,CM40,ACT,0.030000,...,57112.0,59661.0,61147.0,193906.0,2021-12-07 13:23:49,CRE,NaT,BCSB - Comm'l Lending- Taunton,Braintree,MA
3,2020-12-31,60436522,Hodess Construction Corp,Investment Business Checking,0.0,VAR,CK,CK28,ACT,0.002000,...,3316.0,3942.0,4388.0,241524.0,2022-09-08 22:14:08,,NaT,BCSB - No Attleboro Branch,North Attleboro,MA
4,2020-12-31,6252261,Hutchens Holding II LLC,Commercial Swap Mortgage,13405500.0,VAR,CML,CM43,ACT,0.026586,...,3997.0,4209.0,4432.0,230789.0,2024-07-29 12:35:49,CRE,NaT,BCSB - Comm'l Lending - Candleworks,Fall River,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91014,2020-12-31,150291616,"Fisher, Aquinnah C.",eChecking 16/17,100.0,,CK,CK03,ACT,0.000000,...,,,,,NaT,,NaT,BCSB - Walmart Branch,Taunton,MA
91015,2020-12-31,150513086,"Brooks, Cadeem C.",Basic Checking,50.0,,CK,CK02,ACT,0.000000,...,,,,,NaT,,NaT,BCSB - North Raynham Branch,Lowell,MA
91016,2020-12-31,27039064,Bristol County Savings Bank,Escrow Checks Processing,0.0,,BKCK,BTC3,ACT,0.000000,...,1222.0,1352.0,41577.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - Main Office,Taunton,MA
91017,2020-12-31,150337783,Bristol County Savings Bank,Business Checking,0.0,,CK,CK12,ACT,0.000000,...,1222.0,1352.0,51094.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - Contact Center,Taunton,MA


In [6]:
df['mjaccttypcd'].unique()

array(['BKCK', 'CK', 'CML', 'TD', 'MTG', 'SAV', 'MLN', 'CNS', 'LEAS',
       'RTMT'], dtype=object)

In [7]:
df = df[df['mjaccttypcd'].isin(['CML','MLN','CNS','MTG','CK','SAV','TD'])].copy()

In [8]:
# Create Account Type mapping - Easier to understand, based on our major field
def map_account_type(acct_code:str):
    """
    Map mjaccttypcd to friendly Account Type
    """
    mapping = {
        'CML':'Loan',
        'MLN':'Loan',
        'CNS':'Loan',
        'MTG':'Loan',
        'CK':'Deposit',
        'SAV':'Deposit',
        'TD':'Deposit'
    }
    return mapping.get(str(acct_code).upper(), 'Other')

df['Account Type'] = df['mjaccttypcd'].apply(map_account_type)

In [9]:
prop = DeltaTable(src.config.SILVER / "property").to_pandas()

In [10]:
prop

Unnamed: 0,propnbr,aprsvalueamt,aprsdate,taxtowncd,taxtown,taxtypecd,taxdisbperiod,taxescrowyn,propaddr1,propaddr2,...,occupancyeffdate,parcelnbr,fixedmarginamt,priorityseqnbr,datereleased,calcmarginpct,rundate,nbrofunits,datelastmaint,load_timestamp_utc
0,37889,,NaT,,,,,,"373 WESTMINSTER ST, UNITS 1 & 2",,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:31,2025-09-15 10:49:31.157092+00:00
1,37888,890000.00,2017-06-05,,,,,,"373 WESTMINSTER STREET, UNITS 1 & 2",,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:31,2025-09-15 10:49:31.157092+00:00
2,37887,,NaT,,,,,,,,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:31,2025-09-15 10:49:31.157092+00:00
3,37886,890000.00,2017-06-05,,,,,,"373 WESTMINSTER STREET, UNITS 1 & 2",,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:31,2025-09-15 10:49:31.157092+00:00
4,37885,,NaT,,,,,,,,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:31,2025-09-15 10:49:31.157092+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85913,54774,,NaT,,,,,,,,...,NaT,,,,NaT,1.0000,2025-09-12,0.0,2025-09-12 21:43:34,2025-09-15 10:49:31.157092+00:00
85914,66706,,NaT,,,,,,,,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:37,2025-09-15 10:49:31.157092+00:00
85915,53695,,NaT,,,,,,,,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:34,2025-09-15 10:49:31.157092+00:00
85916,51755,,NaT,,,,,,,,...,NaT,,,,NaT,1.0000,2025-09-12,,2025-09-12 21:43:34,2025-09-15 10:49:31.157092+00:00


In [11]:
acct_prop_link = DeltaTable(src.config.SILVER / "account_property_link").to_pandas()

In [12]:
address = DeltaTable(src.config.SILVER / "address").to_pandas()

In [13]:
address

Unnamed: 0,addrnbr,Full_Street_Address,cityname,statecd,zipcd,load_timestamp_utc
0,1012255,11 MOCKINGBIRD DR,DANIELSON,CT,06239,2025-09-15 10:49:29.343255+00:00
1,1012256,55 WATERMAN AVE,WARWICK,RI,02889,2025-09-15 10:49:29.343255+00:00
2,1012257,PO BOX 457,HATHORNE,MA,01937,2025-09-15 10:49:29.343255+00:00
3,1012258,52 ALAN AVE,PORTSMOUTH,RI,02871,2025-09-15 10:49:29.343255+00:00
4,1012259,130 GLENNON ST,NEW BEDFORD,MA,02745,2025-09-15 10:49:29.343255+00:00
...,...,...,...,...,...,...
379561,1439001,,,,,2025-09-15 10:49:29.343255+00:00
379562,1439020,,,,,2025-09-15 10:49:29.343255+00:00
379563,1439058,620 SAMPSONS MILL RD,COTUIT,MA,02635,2025-09-15 10:49:29.343255+00:00
379564,1439059,PO BOX# 1493,MARSTONS MILLS,MA,02648,2025-09-15 10:49:29.343255+00:00


In [14]:
prop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85918 entries, 0 to 85917
Data columns (total 82 columns):
 #   Column               Non-Null Count  Dtype              
---  ------               --------------  -----              
 0   propnbr              85918 non-null  object             
 1   aprsvalueamt         11945 non-null  object             
 2   aprsdate             11945 non-null  datetime64[us]     
 3   taxtowncd            0 non-null      object             
 4   taxtown              0 non-null      object             
 5   taxtypecd            0 non-null      object             
 6   taxdisbperiod        0 non-null      object             
 7   taxescrowyn          0 non-null      object             
 8   propaddr1            12957 non-null  object             
 9   propaddr2            8 non-null      object             
 10  propaddr3            0 non-null      object             
 11  propcity             12958 non-null  object             
 12  propstate         

In [None]:
df['branchname'].unique()

In [None]:
df

In [None]:
region_map = {
    # ——— Attleboro/Taunton ———
    'BCSB - MUNI MAIN OFFICE': 'Attleboro/Taunton',
    'BCSB - MAIN OFFICE': 'Attleboro/Taunton',
    "BCSB - COMM'L LENDING- TAUNTON": 'Attleboro/Taunton',
    'BCSB - MUNI ATTLEBORO BRANCH': 'Attleboro/Taunton',
    'BCSB - DEPOSIT OPERATIONS': 'Attleboro/Taunton',
    'BCSB - NO ATTLEBORO BRANCH': 'Attleboro/Taunton',
    'BRISTOL COUNTY SAVINGS BANK': 'Attleboro/Taunton',
    "BCSB - COMM'L LENDING - ATTLEBORO": 'Attleboro/Taunton',
    'BCSB - BEACON SECURITY CORP': 'Attleboro/Taunton',
    'BCSB - ATTLEBORO BRANCH': 'Attleboro/Taunton',
    'BCSB - MUNI COUNTY STREET BRANCH': 'Attleboro/Taunton',
    'BCSB - REHOBOTH BRANCH': 'Attleboro/Taunton',
    'BCSB - MUNI REHOBOTH BRANCH': 'Attleboro/Taunton',
    'BCSB - MUNI NO ATTLEBORO BRANCH': 'Attleboro/Taunton',
    'BCSB - MUNI RAYNHAM CENTER BRANCH': 'Attleboro/Taunton',
    'BCSB - COUNTY STREET BRANCH': 'Attleboro/Taunton',
    'BCSB - NORTH RAYNHAM BRANCH': 'Attleboro/Taunton',
    'BCSB - RAYNHAM CENTER BRANCH': 'Attleboro/Taunton',
    "BCSB - COMM'L LENDING - FRANKLIN": 'Attleboro/Taunton',
    'BCSB - FRANKLIN BRANCH': 'Attleboro/Taunton',
    'BCSB - CONS INST LENDING- TAUNTON': 'Attleboro/Taunton',
    'BCSB - RESIDENTIAL MTG - ATTLEBORO': 'Attleboro/Taunton',
    'BCSB - RESIDENTIAL MTG- TAUNTON': 'Attleboro/Taunton',
    'BCSB - RESIDENTIAL MTG - FRANKLIN': 'Attleboro/Taunton',
    'BCSB - CONS INST LENDING - ATTLEBORO': 'Attleboro/Taunton',
    'BCSB - SMALL BUSINESS LOAN CENTER': 'Attleboro/Taunton',
    'BCSB - CONTACT CENTER': 'Attleboro/Taunton',
    'BCSB - TAUNTON HIGH SCHOOL': 'Attleboro/Taunton',
    'BCSB - MUNI ATTLEBORO HIGH SCHOOL': 'Attleboro/Taunton',
    'BCSB - ATTLEBORO HIGH SCHOOL': 'Attleboro/Taunton',
    'BCSB - INDIRECT LENDING': 'Attleboro/Taunton',
    'BCSB - WALMART BRANCH': 'Attleboro/Taunton',
    'BCSB - GOVERNMENT FINANCE DEPARTMENT': 'Attleboro/Taunton',

    # ——— South Coast ———
    'BCSB - MUNI FALL RIVER BRANCH': 'South Coast',
    "BCSB - COMM'L LENDING - FALL RIVER": 'South Coast',
    "BCSB - COMM'L LENDING - CANDLEWORKS": 'South Coast',
    "BCSB - COMM'L LENDING - DARTMOUTH": 'South Coast',
    'BCSB - MUNI DARTMOUTH BRANCH': 'South Coast',
    'BCSB - MUNI NB ASHLEY BLVD BRANCH': 'South Coast',
    'BCSB - NB ASHLEY BLVD BRANCH': 'South Coast',
    'BCSB - MUNI CANDLEWORKS BRANCH': 'South Coast',
    'BCSB - MUNI EAST FREETOWN BRANCH': 'South Coast',
    'BCSB - DARTMOUTH BRANCH': 'South Coast',
    'BCSB - EAST FREETOWN BRANCH': 'South Coast',
    'BCSB - FALL RIVER BRANCH': 'South Coast',
    'BCSB - CANDLEWORKS BRANCH': 'South Coast',
    'BCSB - RESIDENTIAL MTG - DARTMOUTH': 'South Coast',
    'BCSB - RESIDENTIAL MTG - FALL RIVER': 'South Coast',
    'BCSB - RESI LENDING - NEW BEDFORD': 'South Coast',
    'BCSB - NB ROCKDALE AVE BRANCH': 'South Coast',

    # ——— Rhode Island ———
    "BCSB - COMM'L LENDING - WARWICK": 'Rhode Island',
    "BCSB - COMM'L LENDING - PROVIDENCE": 'Rhode Island',
    "BCSB - COMM'L LENDING - PAWTUCKET": 'Rhode Island',
    'BCSB - CUMBERLAND': 'Rhode Island',
    'BCSB - PAWTUCKET BRANCH': 'Rhode Island',
    "BCSB - CMM'L LENDING - FNB-RI": 'Rhode Island',
    'BCSB - MUNI PAWTUCKET BRANCH': 'Rhode Island',
    'BCSB - RESIDENTIAL MTG - PAWTUCKET': 'Rhode Island',
    'BCSB - MUNI GREENVILLE': 'Rhode Island',
    'BCSB - GREENVILLE': 'Rhode Island',
    'BCSB - RESI LENDING - WARWICK': 'Rhode Island',
    'BCSB - CONS INST LENDING - PAWTUCKET': 'Rhode Island',
    'BCSB - CONS INST LENDING - FNB-RI': 'Rhode Island',
    'BCSB - MUNI CUMBERLAND': 'Rhode Island',
    'BCSB - RESIDENTIAL MTG - FNB-RI': 'Rhode Island',
    "BCSB - COMM'L LENDING - FNB - RI": 'Rhode Island',
    'BCSB - RESIDENTIAL MTG - FNB - RI': 'Rhode Island',
    'BCSB - CONS INST LENDING - FNB - RI': 'Rhode Island',
    
    
    # ——— Other ———
    'BCSB - RESIDENTIAL MTG - CAPE COD': 'Other',
    # Operational catch-alls (if any are left unmapped in future, they'll fall to 'Other' via the fillna below)
}

In [None]:
# region_map = {
#     # ——— Attleboro/Taunton ———
#     'BCSB - MUNI MAIN OFFICE': 'Attleboro/Taunton',
#     'BCSB - MAIN OFFICE': 'Attleboro/Taunton',
#     "BCSB - COMM'L LENDING- TAUNTON": 'Attleboro/Taunton',
#     'BCSB - MUNI ATTLEBORO BRANCH': 'Attleboro/Taunton',
#     'BCSB - DEPOSIT OPERATIONS': 'Attleboro/Taunton',
#     'BCSB - NO ATTLEBORO BRANCH': 'Attleboro/Taunton',
#     'BRISTOL COUNTY SAVINGS BANK': 'Attleboro/Taunton',
#     "BCSB - COMM'L LENDING - ATTLEBORO": 'Attleboro/Taunton',
#     'BCSB - BEACON SECURITY CORP': 'Attleboro/Taunton',
#     'BCSB - ATTLEBORO BRANCH': 'Attleboro/Taunton',
#     'BCSB - MUNI COUNTY STREET BRANCH': 'Attleboro/Taunton',
#     'BCSB - REHOBOTH BRANCH': 'Attleboro/Taunton',
#     'BCSB - MUNI REHOBOTH BRANCH': 'Attleboro/Taunton',
#     'BCSB - MUNI NO ATTLEBORO BRANCH': 'Attleboro/Taunton',
#     'BCSB - MUNI RAYNHAM CENTER BRANCH': 'Attleboro/Taunton',
#     'BCSB - COUNTY STREET BRANCH': 'Attleboro/Taunton',
#     'BCSB - NORTH RAYNHAM BRANCH': 'Attleboro/Taunton',
#     'BCSB - RAYNHAM CENTER BRANCH': 'Attleboro/Taunton',
#     "BCSB - COMM'L LENDING - FRANKLIN": 'Attleboro/Taunton',
#     'BCSB - FRANKLIN BRANCH': 'Attleboro/Taunton',
#     'BCSB - CONS INST LENDING- TAUNTON': 'Attleboro/Taunton',
#     'BCSB - RESIDENTIAL MTG - ATTLEBORO': 'Attleboro/Taunton',
#     'BCSB - RESIDENTIAL MTG- TAUNTON': 'Attleboro/Taunton',
#     'BCSB - RESIDENTIAL MTG - FRANKLIN': 'Attleboro/Taunton',
#     'BCSB - CONS INST LENDING - ATTLEBORO': 'Attleboro/Taunton',
#     'BCSB - SMALL BUSINESS LOAN CENTER': 'Attleboro/Taunton',
#     'BCSB - CONTACT CENTER': 'Attleboro/Taunton',
#     'BCSB - TAUNTON HIGH SCHOOL': 'Attleboro/Taunton',
#     'BCSB - MUNI ATTLEBORO HIGH SCHOOL': 'Attleboro/Taunton',
#     'BCSB - ATTLEBORO HIGH SCHOOL': 'Attleboro/Taunton',
#     'BCSB - INDIRECT LENDING': 'Attleboro/Taunton',

#     # ——— South Coast ———
#     'BCSB - MUNI FALL RIVER BRANCH': 'South Coast',
#     "BCSB - COMM'L LENDING - FALL RIVER": 'South Coast',
#     "BCSB - COMM'L LENDING - CANDLEWORKS": 'South Coast',
#     "BCSB - COMM'L LENDING - DARTMOUTH": 'South Coast',
#     'BCSB - MUNI DARTMOUTH BRANCH': 'South Coast',
#     'BCSB - MUNI NB ASHLEY BLVD BRANCH': 'South Coast',
#     'BCSB - NB ASHLEY BLVD BRANCH': 'South Coast',
#     'BCSB - MUNI CANDLEWORKS BRANCH': 'South Coast',
#     'BCSB - MUNI EAST FREETOWN BRANCH': 'South Coast',
#     'BCSB - DARTMOUTH BRANCH': 'South Coast',
#     'BCSB - EAST FREETOWN BRANCH': 'South Coast',
#     'BCSB - FALL RIVER BRANCH': 'South Coast',
#     'BCSB - CANDLEWORKS BRANCH': 'South Coast',
#     'BCSB - RESIDENTIAL MTG - DARTMOUTH': 'South Coast',
#     'BCSB - RESIDENTIAL MTG - FALL RIVER': 'South Coast',
#     'BCSB - RESI LENDING - NEW BEDFORD': 'South Coast',

#     # ——— Rhode Island ———
#     "BCSB - COMM'L LENDING - WARWICK": 'Rhode Island',
#     "BCSB - COMM'L LENDING - PROVIDENCE": 'Rhode Island',
#     "BCSB - COMM'L LENDING - PAWTUCKET": 'Rhode Island',
#     'BCSB - CUMBERLAND': 'Rhode Island',
#     'BCSB - PAWTUCKET BRANCH': 'Rhode Island',
#     "BCSB - CMM'L LENDING - FNB-RI": 'Rhode Island',
#     'BCSB - MUNI PAWTUCKET BRANCH': 'Rhode Island',
#     'BCSB - RESIDENTIAL MTG - PAWTUCKET': 'Rhode Island',
#     'BCSB - MUNI GREENVILLE': 'Rhode Island',
#     'BCSB - GREENVILLE': 'Rhode Island',
#     'BCSB - RESI LENDING - WARWICK': 'Rhode Island',
#     'BCSB - CONS INST LENDING - PAWTUCKET': 'Rhode Island',
#     'BCSB - CONS INST LENDING - FNB-RI': 'Rhode Island',
#     'BCSB - MUNI CUMBERLAND': 'Rhode Island',
#     'BCSB - RESIDENTIAL MTG - FNB-RI': 'Rhode Island',

#     # ——— Other ———
#     'BCSB - RESIDENTIAL MTG - CAPE COD': 'Other',
#     # Operational catch-alls (if any are left unmapped in future, they'll fall to 'Other' via the fillna below)
# }

# # Create the Region column from the mapping
# import numpy as np
# df['Region'] = df['branchname'].map(region_map).fillna(
#     np.where(df['branchname'].str.contains(r'Warwick|Providence|Pawtucket|Cumberland|Greenville|FNB-RI', case=False), 'Rhode Island',
#     np.where(df['branchname'].str.contains(r'Fall River|Dartmouth|East Freetown|New Bedford|Candleworks|Ashley Blvd', case=False), 'South Coast',
#     np.where(df['branchname'].str.contains(r'Attleboro|Franklin|Raynham|Taunton|Rehoboth|County Street|Main Office', case=False), 'Attleboro/Taunton', 'Other'))))



In [None]:
# --- Normalize, Map Regions, and Emit "Other" Review --------------------------
import re
import pandas as pd

# 1) Normalize BRANCH names in a new column (keep original for reference)
def normalize_branch(series: pd.Series) -> pd.Series:
    s = series.fillna("").astype(str).str.upper()

    # Unify quotes/apostrophes and whitespace/hyphens
    s = s.str.replace(r"[‘’ʼ´`]", "'", regex=True)           # curly -> straight
    s = s.str.replace(r"\s*-\s*", " - ", regex=True)         # spaces around hyphen
    s = s.str.replace(r"\s+", " ", regex=True).str.strip()   # collapse spaces

    # Fix a couple of known variants/typos seen historically
    s = s.str.replace("CMM'L", "COMM'L", regex=False)
    s = s.str.replace("COMM’L", "COMM'L", regex=False)

    return s

df["branch_std"] = normalize_branch(df["branchname"])

# 2) Ensure the region map uses UPPERCASE keys (safe even if yours already are)
region_map_upper = {str(k).upper(): v for k, v in region_map.items()}

# 3) Exact match mapping first
df["Region"] = df["branch_std"].map(region_map_upper)

# 4) Regex fallback by geography (safety net; exact map above is the source of truth)
_fallback_patterns = [
    (r"\b(WARWICK|PROVIDENCE|PAWTUCKET|CUMBERLAND|GREENVILLE|FNB-RI)\b", "Rhode Island"),
    (r"\b(FALL RIVER|DARTMOUTH|EAST FREETOWN|NEW BEDFORD|CANDLEWORKS|ASHLEY BLVD)\b", "South Coast"),
    (r"\b(ATTLEBORO|FRANKLIN|RAYNHAM|TAUNTON|REHOBOTH|COUNTY STREET|MAIN OFFICE)\b", "Attleboro/Taunton"),
]

def fallback_region(name: str) -> str | None:
    for pat, region in _fallback_patterns:
        if re.search(pat, name):
            return region
    return None

df["Region"] = df["Region"].fillna(df["branch_std"].apply(lambda x: fallback_region(x) or "Other"))

# 5) Optional: fix Region order so it’s stable across runs
REGION_ORDER = ["Attleboro/Taunton", "South Coast", "Rhode Island", "Other"]
df["Region"] = pd.Categorical(df["Region"], categories=REGION_ORDER, ordered=True)

# 6) Your aggregation (unchanged)
grouped_df = (
    df.groupby(["Region", "Account Type"], observed=True)
      .agg(NetBalance_sum=("Net Balance", "sum"))
      .reset_index()
)

# 7) Always emit a review table for unmapped/“Other”
other_base = df[df["Region"] == "Other"].copy()

# One row per normalized branch, with examples of original values and totals
examples = (
    other_base.groupby("branch_std")["branchname"]
    .apply(lambda x: sorted(set(x))[:3])  # up to 3 example raw names
    .reset_index(name="examples")
)

other_df = (
    other_base.groupby("branch_std", as_index=False)
    .agg(
        n_accounts=("branch_std", "size"),
        NetBalance_sum=("Net Balance", "sum"),
    )
    .merge(examples, on="branch_std", how="left")
    .sort_values(["n_accounts", "NetBalance_sum"], ascending=[False, False])
)

# 8) (Nice to have) quick coverage stats for your log
mapped_rate = (df["Region"] != "Other").mean()
print(f"Region mapping coverage: {mapped_rate:.1%} of rows; {other_df.shape[0]} unmapped branch_std values.")
# -----------------------------------------------------------------------------



In [None]:
other_df

In [None]:
# Performed 1 aggregation grouped on columns: 'Region', 'Account Type'
grouped_df = df.groupby(['Region', 'Account Type']).agg(NetBalance_sum=('Net Balance', 'sum')).reset_index()

In [None]:
grouped_df