In [1]:
# %%
## Memorializing this as the easy way to lookup accounts


# %%
"""
Main Entry Point
"""
from pathlib import Path
from typing import List
import argparse
from datetime import datetime

import pandas as pd # type: ignore

import src.additional_fields
import src.fetch_data # type: ignore
import src.core_transform # type: ignore
import cdutils.pkey_sqlite # type: ignore
import cdutils.hhnbr # type: ignore
import cdutils.loans.calculations
import cdutils.inactive_date
import cdutils.input_cleansing

# Current (doesn't really work without)
# data = src.fetch_data.fetch_data()


# # Specific date
# specified_date = datetime(2020, 12, 31)

def query_df_on_date(specified_date):

    data = src.fetch_data.fetch_data(specified_date)

    # # # Core transformation pipeline
    raw_data = src.core_transform.main_pipeline(data)

    # Raw data with pkey appended
    raw_data = cdutils.pkey_sqlite.add_pkey(raw_data)
    raw_data = cdutils.pkey_sqlite.add_ownership_key(raw_data)
    raw_data = cdutils.pkey_sqlite.add_address_key(raw_data)

    # %%
    househldacct = data['househldacct'].copy()
    raw_data = cdutils.hhnbr.add_hh_nbr(raw_data, househldacct)

    # %%
    raw_data

    # %%

    # %%
    loan_category_df = cdutils.loans.calculations.categorize_loans(raw_data)

    # %%
    loan_category_df = loan_category_df[['acctnbr','Category']].copy()

    # %%
    df = pd.merge(raw_data, loan_category_df, on='acctnbr', how='left')

    # %%
    df

    # %%

    df = cdutils.inactive_date.append_inactive_date(df)

    # %%
    df

    # %%
    pers = data['wh_pers'].copy()

    # %%
    # df.info()

    # %%

    additional_fields = src.additional_fields.fetch_data(specified_date)

    # %%
    additional_fields_to_append = additional_fields['wh_acctcommon'].copy()

    # %%

    additional_fields_to_append_schema = {
        'acctnbr':'str'
    }

    additional_fields_to_append = cdutils.input_cleansing.enforce_schema(additional_fields_to_append, additional_fields_to_append_schema)

    # %%
    df = pd.merge(df, additional_fields_to_append, how='left', on='acctnbr')

    # %%
    return df

# %%
# OUTPUT_PATH = Path('acct_table.csv')
# df.to_csv(OUTPUT_PATH, index=False)

# %%
# import hashlib

# %%
# def mask_pii(data, columns_to_mask, length=10):
#     """
#     Create a masking layer

#     Pass in a dataframe to blackbox abstraction and get a dataframe returned with masked PII in specified fields

#     Parameters:
#     - data: raw data
#     - columns_to_mask: list of columns
#     - length: length of hash (10+ is recommended based on size of the data)
#     """
#     df_hashed = data.copy()

#     for col in columns_to_mask:
#         if col in df_hashed.columns:
#             df_hashed[col] = df_hashed[col].astype(str).apply(
#                 lambda x: hashlib.sha256(x.encode('utf-8')).hexdigest()[:length]
#             )
#         else:
#             raise ValueError(f"Column {col} not found in dataframe passed in")
        
#     return df_hashed

# %%
# columns_to_mask = [
#     'acctnbr',
#     'ownersortname',
#     'loanofficer',
#     'acctofficer',
#     'taxrptfororgnbr',
#     'taxrptforpersnbr'
#     ]

# masked_df = mask_pii(df, columns_to_mask)

# %%
# masked_df

# %%
# transposed_df = masked_df.head(3).T.reset_index()

# %%
# transposed_df

# %%
# OUTPUT_PATH = Path('masked_acct_table.csv')
# masked_df.to_csv(OUTPUT_PATH, index=False)

# %%





In [2]:
"""
Data cleaning main pipeline for Accubranch project.

This module implements the high-level data pipeline workflow:
1. Generate/load the acct_df (account data)
2. Merge with organization data and exclude Municipal & Fiduciary accounts
3. Merge with person data to get DOB (join on taxrptforpersnbr)
4. Append address data through orgaddruse/persaddruse tables to get address details

The result is a comprehensive dataframe that serves as the backbone for:
- Account Section reporting (as of any date)
- Annual deposit analysis

Data sources are modular and can be easily swapped between mock data and production database.
"""

import pandas as pd
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any

from src.join_functions import (
    join_accounts_with_orgs,
    join_accounts_with_persons,
    join_accounts_with_addresses
)
import src.acct_data_gathering


def load_mock_data() -> Dict[str, pd.DataFrame]:
    """
    Load data from mock CSV files.
    
    Returns:
    --------
    dict
        Dictionary containing all required dataframes for the pipeline
    """
    base_path = Path("assets/mock_data")
    
    return {
        'wh_org': pd.read_csv(base_path / "wh_org.csv"),
        'wh_pers': pd.read_csv(base_path / "wh_pers.csv"),
        'wh_addr': pd.read_csv(base_path / "wh_addr.csv"),
        'orgaddruse': pd.read_csv(base_path / "orgaddruse.csv"),
        'persaddruse': pd.read_csv(base_path / "persaddruse.csv")
    }


def load_production_data() -> Dict[str, pd.DataFrame]:
    """
    Load data from production database.
    
    This is a placeholder function that would be implemented to connect
    to the actual production database and retrieve the required tables.
    
    Returns:
    --------
    dict
        Dictionary containing all required dataframes for the pipeline
    """

    data = src.acct_data_gathering.fetch_data()
    return data
    
def run_data_cleaning_pipeline(
    as_of_date: datetime,
    data_source: str = "mock",
    exclude_org_types: Optional[list] = None,
    address_use_type: str = "PRI"
) -> pd.DataFrame:
    """
    Run the complete data cleaning pipeline following the workflow:
    1. Generate account data for specified date
    2. Merge with organizations (excluding Municipal & Fiduciary)
    3. Merge with persons to get DOB
    4. Append address information
    
    Parameters:
    -----------
    as_of_date : datetime
        The date for which to generate account data
    data_source : str, default "mock"
        Data source to use ("mock" or "production")
    exclude_org_types : list, optional
        Organization types to exclude (defaults to Municipal & Fiduciary)
    address_use_type : str, default "PRI"
        Address use type to filter for (e.g., 'PRI' for primary addresses)
        
    Returns:
    --------
    pd.DataFrame
        Comprehensive dataframe with account, customer, and address data
        
    Example:
    --------
    >>> from datetime import datetime
    >>> result = run_data_cleaning_pipeline(
    ...     as_of_date=datetime(2024, 12, 31),
    ...     data_source="mock",
    ...     exclude_org_types=['MUNIC', 'FIDUC']
    ... )
    >>> print(f"Final dataset: {len(result)} accounts with {len(result.columns)} columns")
    """
    
    # Set default exclusions for Municipal & Fiduciary accounts
    if exclude_org_types is None:
        exclude_org_types = ['MUNIC', 'FIDUC']  # Municipal & Fiduciary Accounts
    
    print(f"Starting data cleaning pipeline for {as_of_date.date()}")
    print(f"Using data source: {data_source}")
    print(f"Excluding organization types: {exclude_org_types}")
    
    # Step 1: Load supporting data based on source
    print("\n=== Step 1: Loading supporting data ===")
    if data_source == "mock":
        data_tables = load_mock_data()
        print("Loaded mock data from CSV files")
    elif data_source == "production":
        data_tables = load_production_data()
        print("Loaded production data from database")
    else:
        raise ValueError(f"Unknown data source: {data_source}. Use 'mock' or 'production'")
    
    # Step 2: Generate account data for the specified date
    print(f"\n=== Step 2: Generating account data for {as_of_date.date()} ===")
    acct_df = query_df_on_date(as_of_date)
    print(f"Generated {len(acct_df)} account records")
    print(f"Account types: {acct_df['mjaccttypcd'].value_counts().to_dict()}")
    
    # Step 3: Merge with organizations and exclude specified types
    print(f"\n=== Step 3: Merging with organizations (excluding {exclude_org_types}) ===")
    org_merged_df = join_accounts_with_orgs(
        acct_df, 
        data_tables['wh_org'], 
        exclude_org_types=exclude_org_types
    )
    print(f"After organization merge and filtering: {len(org_merged_df)} accounts")
    
    # Step 4: Merge with persons to get DOB (join on taxrptforpersnbr)
    print(f"\n=== Step 4: Merging with persons to append DOB ===")
    pers_merged_df = join_accounts_with_persons(org_merged_df, data_tables['wh_pers'])
    person_accounts = pers_merged_df['datebirth'].notna().sum()
    print(f"After person merge: {len(pers_merged_df)} accounts ({person_accounts} with person data)")
    
    # Step 5: Append address information
    print(f"\n=== Step 5: Appending address information ===")
    
    # Split accounts by customer type for address joins
    org_accounts = pers_merged_df[pers_merged_df['taxrptfororgnbr'].notna()]
    person_accounts = pers_merged_df[pers_merged_df['taxrptforpersnbr'].notna()]
    other_accounts = pers_merged_df[
        pers_merged_df['taxrptfororgnbr'].isna() & pers_merged_df['taxrptforpersnbr'].isna()
    ]
    
    print(f"Organization accounts: {len(org_accounts)}")
    print(f"Person accounts: {len(person_accounts)}")
    print(f"Other accounts (no customer link): {len(other_accounts)}")
    
    # Join organization addresses
    if len(org_accounts) > 0:
        org_with_addr = join_accounts_with_addresses(
            org_accounts, 
            data_tables['orgaddruse'], 
            data_tables['wh_addr'], 
            address_use_type=address_use_type
        )
        org_addr_count = org_with_addr['text1'].notna().sum()
        print(f"Organization accounts with addresses: {org_addr_count}/{len(org_accounts)}")
    else:
        org_with_addr = pd.DataFrame()
    
    # Join person addresses
    if len(person_accounts) > 0:
        pers_with_addr = join_accounts_with_addresses(
            person_accounts, 
            data_tables['persaddruse'], 
            data_tables['wh_addr'], 
            address_use_type=address_use_type
        )
        pers_addr_count = pers_with_addr['text1'].notna().sum()
        print(f"Person accounts with addresses: {pers_addr_count}/{len(person_accounts)}")
    else:
        pers_with_addr = pd.DataFrame()
    
    # Combine all results
    dataframes_to_combine = []
    if len(org_with_addr) > 0:
        dataframes_to_combine.append(org_with_addr)
    if len(pers_with_addr) > 0:
        dataframes_to_combine.append(pers_with_addr)
    if len(other_accounts) > 0:
        # Add empty address columns to other_accounts to match schema
        address_cols = ['text1', 'text2', 'text3', 'cityname', 'statecd', 'zipcd', 'addrnbr']
        for col in address_cols:
            if col not in other_accounts.columns:
                other_accounts[col] = pd.NA
        dataframes_to_combine.append(other_accounts)
    
    if dataframes_to_combine:
        final_df = pd.concat(dataframes_to_combine, ignore_index=True)
    else:
        final_df = pers_merged_df  # Fallback if no addresses found
    
    # Step 6: Final summary
    print(f"\n=== Pipeline Complete ===")
    print(f"Final dataset: {len(final_df)} accounts with {len(final_df.columns)} columns")
    total_with_addresses = final_df['text1'].notna().sum()
    print(f"Accounts with address data: {total_with_addresses}/{len(final_df)} ({total_with_addresses/len(final_df)*100:.1f}%)")
    
    # Summary by account type
    print(f"\nAccount type summary:")
    for acct_type, count in final_df['mjaccttypcd'].value_counts().items():
        print(f"  {acct_type}: {count}")
    
    return final_df

In [3]:
data = load_production_data()

In [6]:
data.keys()

dict_keys(['wh_addr', 'orgaddruse', 'persaddruse', 'wh_org', 'wh_pers'])

In [4]:
org = data['wh_org'].copy()

In [5]:
org.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17096 entries, 0 to 17095
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   orgnbr         17096 non-null  int64         
 1   orgname        17096 non-null  object        
 2   orgtypcd       16921 non-null  object        
 3   orgtypcddesc   16921 non-null  object        
 4   taxid          13358 non-null  object        
 5   taxidtypcd     13509 non-null  object        
 6   rpt1099intyn   17096 non-null  object        
 7   privacyyn      17096 non-null  object        
 8   taxexemptyn    17096 non-null  object        
 9   cipratingcd    67 non-null     object        
 10  creditscore    1644 non-null   float64       
 11  siccd          331 non-null    object        
 12  siccddesc      331 non-null    object        
 13  sicsubcd       311 non-null    object        
 14  sicsubcddesc   308 non-null    object        
 15  naicscd        1140

In [7]:
org

Unnamed: 0,orgnbr,orgname,orgtypcd,orgtypcddesc,taxid,taxidtypcd,rpt1099intyn,privacyyn,taxexemptyn,cipratingcd,...,sicsubcd,sicsubcddesc,naicscd,naicscddesc,adddate,datelastmaint,rundate,allowpromoyn,homeemail,busemail
0,1003030,CORNELL MILL MASTER TENANT LLC,LLC,Limited Liability Corporation,D659FFA35580DFB3D1899D0595A33C2B,FEIN,Y,N,N,,...,,,531311,Residential Property Managers,2016-08-08,2025-07-15 21:27:18,2025-07-15,N,cstarr@starrdev.com,
1,1003031,KOSTKAS TAUNTON HARDWARE & SUPPLY CO INC,CORP,Corporation/Business,E3E23BF51CA86F99980ECCCDA46AF261,FEIN,Y,N,N,,...,,,444130,Hardware Stores,1993-10-01,2025-07-15 21:27:18,2025-07-15,N,,
2,1003032,AAFR INC,CORP,Corporation/Business,E70F9A4AC91DAB6C466E922309E61BF7,FEIN,Y,N,N,,...,,,813990,"Other Similar Organizations (except Business, ...",2017-01-20,2025-07-15 21:27:18,2025-07-15,N,academicarestaurant@gmail.com,afern1961@verizon.net
3,1003033,161 POPES ISLAND LLC,LLC,Limited Liability Corporation,C477A5C4B1CEFE125DA660E6EF62B149,FEIN,Y,N,N,,...,,,813990,"Other Similar Organizations (except Business, ...",2017-06-01,2025-07-15 21:27:18,2025-07-15,N,,
4,1003034,PETER TETREAULT INC,CORP,Corporation/Business,ED849F76A707CFB27C30FE65BB8423B5,FEIN,Y,N,N,,...,,,,,1994-04-29,2025-07-15 21:27:18,2025-07-15,N,mirimichi@comcast.net,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17091,1013981,CALVERT FAMILY TRUST,TRST,Trust/Fiduciary,,,Y,N,N,,...,,,525920,"Trusts, Estates, and Agency Accounts",2022-04-08,2025-07-15 21:27:19,2025-07-15,N,,calvert.karene@gmail.com
17092,1013996,JOHNSON TRANSPORTATION LLC,LLC,Limited Liability Corporation,90AEFF56BD47D77EC5D22796A2203FA9,FEIN,Y,N,N,,...,4212,4212-Local Trucking Without Storage,484110,"General Freight Trucking, Local",2022-04-14,2025-07-15 21:27:19,2025-07-15,N,nickjohnson7647@gmail.com,nickjohnson7647@gmail.com
17093,1014008,COMMERCIAL CARPET CLEANING,DBA,Doing Business As,EAA9EF160AAE4AEC77E80FDA017B2DD8,FEIN,Y,N,N,,...,,,811490,Other Personal and Household Goods Repair and ...,2022-04-15,2025-07-15 21:27:19,2025-07-15,N,,etruscanbeast822@gmail.com
17094,1014035,FRANKLIN ANALYTICAL SERVICES INC,CORP,Corporation/Business,4705F64CD0336DA1DFFFC97359C5498D,FEIN,Y,N,N,,...,,,238990,All Other Specialty Trade Contractors,2022-04-20,2025-07-15 21:27:19,2025-07-15,N,,afranklin@franklinanalytical.net


In [3]:
data2020 = run_data_cleaning_pipeline(
    as_of_date=datetime(2020, 12, 31),
    data_source="production",
    exclude_org_types=["MUNI","TRST"]
)

data2021 = run_data_cleaning_pipeline(
    as_of_date=datetime(2021, 12, 31),
    data_source="production",
    exclude_org_types=["MUNI","TRST"]
)

data2022 = run_data_cleaning_pipeline(
    as_of_date=datetime(2022, 12, 30),
    data_source="production",
    exclude_org_types=["MUNI","TRST"]
)

data2023 = run_data_cleaning_pipeline(
    as_of_date=datetime(2023, 12, 29),
    data_source="production",
    exclude_org_types=["MUNI","TRST"]
)

data2024 = run_data_cleaning_pipeline(
    as_of_date=datetime(2024, 12, 31),
    data_source="production",
    exclude_org_types=["MUNI","TRST"]
)

Starting data cleaning pipeline for 2020-12-31
Using data source: production
Excluding organization types: ['MUNI', 'TRST']

=== Step 1: Loading supporting data ===
Loaded production data from database

=== Step 2: Generating account data for 2020-12-31 ===
Generated 91019 account records
Account types: {'CK': 34663, 'CNS': 20004, 'SAV': 17560, 'TD': 7194, 'MTG': 3815, 'CML': 3783, 'RTMT': 2018, 'LEAS': 1964, 'BKCK': 12, 'MLN': 6}

=== Step 3: Merging with organizations (excluding ['MUNI', 'TRST']) ===
Excluded 1455 records with organization types: ['MUNI', 'TRST']
After organization merge and filtering: 89564 accounts

=== Step 4: Merging with persons to append DOB ===
After person merge: 89564 accounts (78217 with person data)

=== Step 5: Appending address information ===
Organization accounts: 11134
Person accounts: 78430
Other accounts (no customer link): 0
Organization accounts with addresses: 11129/11134
Person accounts with addresses: 78424/78430

=== Pipeline Complete ===
Fina

In [4]:
import src.annual_deposit_history


dataframes = [data2020, data2021, data2022, data2023, data2024]
dates = ['2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31','2024-12-31']
five_yr_history = src.annual_deposit_history.create_time_series_analysis(dataframes, dates)

Creating time series analysis for 5 periods
Processing period 1/5: 2020-12-31
  - Found 32 branches
Processing period 2/5: 2021-12-31
  - Found 31 branches
Processing period 3/5: 2022-12-31
  - Found 30 branches
Processing period 4/5: 2023-12-31
  - Found 30 branches
Processing period 5/5: 2024-12-31
  - Found 28 branches

Time series analysis complete:
- 32 branches analyzed
- 5 time periods


In [5]:
five_yr_history

Unnamed: 0_level_0,2020-12-31,2021-12-31,2022-12-31,2023-12-31,2024-12-31
branchname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BCSB - MAIN OFFICE,275407400.0,267138700.0,250299400.0,252937800.0,285849500.0
BCSB - ATTLEBORO BRANCH,225101500.0,246703600.0,220122700.0,206359000.0,213551400.0
BCSB - NO ATTLEBORO BRANCH,182169300.0,236417400.0,188948000.0,170329200.0,186892700.0
BCSB - RAYNHAM CENTER BRANCH,163255500.0,171061700.0,189411100.0,178326000.0,185669200.0
BCSB - REHOBOTH BRANCH,133261900.0,155421300.0,161953900.0,177610600.0,173706100.0
BCSB - PAWTUCKET BRANCH,125962700.0,138897500.0,149246500.0,124341800.0,140703300.0
BCSB - COUNTY STREET BRANCH,90802470.0,102212200.0,104040200.0,116612700.0,127018400.0
BCSB - DARTMOUTH BRANCH,103170300.0,112941100.0,109669100.0,110904300.0,125736000.0
BCSB - NORTH RAYNHAM BRANCH,119881700.0,124420100.0,123409500.0,114025100.0,114215800.0
BCSB - NB ASHLEY BLVD BRANCH,62403100.0,71644360.0,94961530.0,93844910.0,83077530.0


In [None]:
### This should be the first part, 5 Year History of Branches

In [6]:
data_current = run_data_cleaning_pipeline(
    as_of_date=datetime(2025, 6, 30),
    data_source="production",
    exclude_org_types=["MUNI","TRST"]
)

Starting data cleaning pipeline for 2025-06-30
Using data source: production
Excluding organization types: ['MUNI', 'TRST']

=== Step 1: Loading supporting data ===
Loaded production data from database

=== Step 2: Generating account data for 2025-06-30 ===
Generated 89805 account records
Account types: {'CK': 37205, 'SAV': 16063, 'CNS': 15735, 'TD': 9501, 'MTG': 4457, 'CML': 3390, 'LEAS': 1792, 'RTMT': 1646, 'BKCK': 12, 'MLN': 4}

=== Step 3: Merging with organizations (excluding ['MUNI', 'TRST']) ===
Excluded 1832 records with organization types: ['MUNI', 'TRST']
After organization merge and filtering: 87973 accounts

=== Step 4: Merging with persons to append DOB ===
After person merge: 87973 accounts (77230 with person data)

=== Step 5: Appending address information ===
Organization accounts: 10609
Person accounts: 77364
Other accounts (no customer link): 0
Organization accounts with addresses: 10593/10609
Person accounts with addresses: 77357/77364

=== Pipeline Complete ===
Fina

In [8]:
import numpy as np

In [27]:
data_current['Primary Key'] = np.where(
    data_current['taxrptfororgnbr'].isnull(), 
    'P' + data_current['taxrptforpersnbr'].astype(str), 
    'O' + data_current['taxrptfororgnbr'].astype(str)
)

In [28]:
def concat_address(text1, text2, text3):
    parts = [str(p).strip() for p in [text1, text2, text3] if p and str(p).strip()]
    return ' '.join(parts) if parts else pd.NA

data_current['Address'] = data_current.apply(
    lambda row: concat_address(row.get('text1'), row.get('text2'), row.get('text3')),
    axis=1
)

In [29]:
data_current = data_current[data_current['mjaccttypcd'].isin(['CML','MLN','CNS','MTG','CK','SAV','TD'])].copy()

In [40]:
data_current = data_current[~data_current['currmiaccttypcd'].isin(['CI07'])].copy()

In [34]:
def map_account_type(acct_code:str):
    """
    Map mjaccttypcd to friendly Account Type
    """
    mapping = {
        'CML':'Commercial Loan',
        'MLN':'Commercial Loan',
        'CNS':'Consumer Loan',
        'MTG':'Residential Loan',
        'CK':'Checking',
        'SAV':'Savings',
        'TD':'CD'
    }
    return mapping.get(str(acct_code).upper(), 'Other')

data_current['Account Type'] = data_current['mjaccttypcd'].apply(map_account_type)

In [35]:
data_current['orig_ttl_loan_amt'] = np.where(
    data_current['mjaccttypcd'].isin(['CML','MLN','MTG','CNS']),
    data_current['orig_ttl_loan_amt'],
    pd.NA
)

In [36]:
data_current['Business/Individual'] = np.where(
    data_current['taxrptfororgnbr'].isnull(),
    'Individual',
    'Business'
)

In [30]:
data_current

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,text2,text3,cityname,statecd,zipcd,orgnbr,persnbr_x,persnbr_y,Primary Key,Address
1,2025-06-30,150936915,REDBROOK APARTMENTS LLC,Commercial Mortgages,65500000.0,VAR,CML,CM40,ACT,0.065722,...,,,BRAINTREE,MA,02184,,,,O1012222,ONE CAMPANELLI DRIVE
2,2025-06-30,151058057,NBPIV SARATOGA LLC,Commercial Mortgages,26000000.0,FIX,CML,CM40,ACT,0.063500,...,,,WAKEFIELD,MA,01880,,,,O1016877,401 EDGEWATER PLACE SUITE 265
3,2025-06-30,60801557,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Investment Business Checking,0.0,VAR,CK,CK28,ACT,0.022000,...,1 WASHINGTON ST,,TAUNTON,MA,02780,,,,O1000047,MILL RIVER PROFESSIONAL CENTER 1 WASHINGTON ST
5,2025-06-30,150862011,"R3 PROJECT COMPANY, LLC",CML Fixed Construction,20000000.0,FIX,CML,CM07,ACT,0.065000,...,,,QUINCY,MA,02169,,,,O1015409,1512 HANCOCK
6,2025-06-30,151038843,"POWER 250, LLC",CML ARM Construction,27500000.0,VAR,CML,CM08,ACT,0.070696,...,,,EAST WALPOLE,MA,02032,,,,O1012385,108 ENDEAN DRIVE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87968,2025-06-30,26266946,"MANGELS, RICHARD A.",eChecking (18 & over),0.0,,CK,CK04,ACT,0.000000,...,,,MIDDLEBORO,MA,02346,,1046949,1046949,P1046949,64 OLD CENTER ROAD APT 3
87969,2025-06-30,151036863,"ZUSCHLAG, JENNIFER L.",Prime Time Checking,40000.0,VAR,CK,CK06,ACT,0.000300,...,,,ATTLEBORO,MA,02703,,1155942,1155942,P1155942,204 PINE ST APT 3
87970,2025-06-30,150635426,"HOWLAND, KERRY A.",Basic Checking,100.0,,CK,CK02,ACT,0.000000,...,,,NEW BEDFORD,MA,02744,,1135101,1135101,P1135101,11 DESAUTELS CT
87971,2025-06-30,150318973,"COLON, DEVANTE L.",eChecking (18 & over),400.0,,CK,CK04,ACT,0.000000,...,,,ATTLEBORO,MA,02703,,1100117,1100117,P1100117,197 ROBINSON AVE


In [37]:
data_current = data_current.rename(columns={
    'cityname':'City',
    'statecd':'State',
    'zipcd':'Zip',
    'branchname':'Branch Associated',
    'contractdate':'Date Account Opened',
    'Net Balance':'Current Balance',
    'orig_ttl_loan_amt':'Original Balance (Loans)',
    'datebirth':'Date of Birth'
}).copy()

In [31]:
data_current.columns.to_list()

['effdate',
 'acctnbr',
 'ownersortname',
 'product',
 'noteopenamt',
 'ratetypcd',
 'mjaccttypcd',
 'currmiaccttypcd',
 'curracctstatcd',
 'noteintrate',
 'bookbalance',
 'notebal',
 'contractdate',
 'datemat',
 'taxrptfororgnbr',
 'taxrptforpersnbr',
 'loanofficer',
 'acctofficer',
 'creditlimitamt',
 'origintrate',
 'marginfixed',
 'fdiccatcd',
 'amortterm',
 'totalpctsold',
 'cobal',
 'credlimitclatresamt',
 'riskratingcd',
 'origdate',
 'currterm',
 'loanidx',
 'rcf',
 'availbalamt',
 'fdiccatdesc',
 'origbal',
 'loanlimityn',
 'Net Balance',
 'Net Available',
 'Net Collateral Reserve',
 'Total Exposure',
 'orig_ttl_loan_amt',
 'portfolio_key',
 'ownership_key',
 'address_key',
 'householdnbr',
 'datelastmaint_x',
 'Category',
 'inactivedate',
 'branchname',
 'primaryownercity',
 'primaryownerstate',
 'orgnbr_x',
 'orgname',
 'orgtypcd',
 'orgtypcddesc',
 'taxid_x',
 'taxidtypcd',
 'rpt1099intyn',
 'privacyyn_x',
 'taxexemptyn',
 'cipratingcd_x',
 'creditscore_x',
 'siccd_x',
 'si

In [41]:
data_current_final = data_current[[
    'Primary Key',
    'Address',
    'City',
    'State',
    'Zip',
    'Branch Associated',
    'Account Type',
    'Date Account Opened',
    'Current Balance',
    'Original Balance (Loans)',
    'Date of Birth'
]].copy()

In [42]:
data_current_final

Unnamed: 0,Primary Key,Address,City,State,Zip,Branch Associated,Account Type,Date Account Opened,Current Balance,Original Balance (Loans),Date of Birth
1,O1012222,ONE CAMPANELLI DRIVE,BRAINTREE,MA,02184,BCSB - COMM'L LENDING- TAUNTON,Commercial Loan,2023-09-12,29000000.00,65500000.0,NaT
2,O1016877,401 EDGEWATER PLACE SUITE 265,WAKEFIELD,MA,01880,BCSB - COMM'L LENDING- TAUNTON,Commercial Loan,2024-06-05,26000000.00,26000000.0,NaT
3,O1000047,MILL RIVER PROFESSIONAL CENTER 1 WASHINGTON ST,TAUNTON,MA,02780,BCSB - MAIN OFFICE,Checking,2012-03-05,22759792.20,,NaT
5,O1015409,1512 HANCOCK,QUINCY,MA,02169,BCSB - COMM'L LENDING- TAUNTON,Commercial Loan,2023-03-07,0.00,20000000.0,NaT
6,O1012385,108 ENDEAN DRIVE,EAST WALPOLE,MA,02032,BCSB - COMM'L LENDING - WARWICK,Commercial Loan,2024-04-22,2254909.06,27500000.0,NaT
...,...,...,...,...,...,...,...,...,...,...,...
87968,P1046949,64 OLD CENTER ROAD APT 3,MIDDLEBORO,MA,02346,BCSB - COUNTY STREET BRANCH,Checking,2016-06-29,-858.81,,1961-01-20
87969,P1155942,204 PINE ST APT 3,ATTLEBORO,MA,02703,BCSB - ATTLEBORO BRANCH,Checking,2024-04-15,-859.40,,1974-10-28
87970,P1135101,11 DESAUTELS CT,NEW BEDFORD,MA,02744,BCSB - NB ASHLEY BLVD BRANCH,Checking,2021-07-26,-887.91,,1971-11-25
87971,P1100117,197 ROBINSON AVE,ATTLEBORO,MA,02703,BCSB - COUNTY STREET BRANCH,Checking,2019-05-13,-1032.68,,1993-12-26


In [12]:
summary

Unnamed: 0,branchname,total_deposit_balance
12,BCSB - Main Office,275407400.0
0,BCSB - Attleboro Branch,225101500.0
25,BCSB - No Attleboro Branch,182169300.0
28,BCSB - Raynham Center Branch,163255500.0
29,BCSB - Rehoboth Branch,133261900.0
27,BCSB - Pawtucket Branch,125962700.0
26,BCSB - North Raynham Branch,119881700.0
6,BCSB - Dartmouth Branch,103170300.0
4,BCSB - County Street Branch,90802470.0
23,BCSB - NB Ashley Blvd Branch,62403100.0


In [13]:
summary['total_deposit_balance'].sum()

np.float64(1802015043.04)

In [None]:
specified_date = datetime(2020, 12, 31)
data2020 = query_df_on_date(specified_date)
data2020

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91019 entries, 0 to 91018
Data columns (total 47 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   effdate                 91019 non-null  datetime64[ns]
 1   acctnbr                 91019 non-null  object        
 2   ownersortname           91019 non-null  object        
 3   product                 91019 non-null  object        
 4   noteopenamt             91019 non-null  float64       
 5   ratetypcd               72634 non-null  object        
 6   mjaccttypcd             91019 non-null  object        
 7   currmiaccttypcd         91019 non-null  object        
 8   curracctstatcd          91019 non-null  object        
 9   noteintrate             91019 non-null  float64       
 10  bookbalance             91019 non-null  float64       
 11  notebal                 91019 non-null  float64       
 12  contractdate            91019 non-null  dateti

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,portfolio_key,ownership_key,address_key,householdnbr,datelastmaint,Category,inactivedate,branchname,primaryownercity,primaryownerstate
0,2020-12-31,150200005,MoneyGram Payment Systems Inc,Treasurer's Check,0.0,,BKCK,TRCK,ACT,0.000000,...,41516.0,43261.0,44260.0,187340.0,2020-02-26 23:28:03,,NaT,BCSB - Main Office,Minneapolis,MN
1,2020-12-31,61053112,City Of Taunton,Municipal Money Market,0.0,VAR,CK,CK18,ACT,0.001500,...,4183.0,33668.0,33967.0,97724.0,2020-02-26 23:28:03,,NaT,BCSB - Muni Main Office,Taunton,MA
2,2020-12-31,150544263,REDBROOK APARTMENTS LLC,Commercial Mortgages,42000000.0,VAR,CML,CM40,ACT,0.030000,...,57113.0,59661.0,61147.0,193906.0,2021-12-07 13:23:49,CRE,NaT,BCSB - Comm'l Lending- Taunton,Braintree,MA
3,2020-12-31,60436522,Hodess Construction Corp,Investment Business Checking,0.0,VAR,CK,CK28,ACT,0.002000,...,3315.0,3942.0,4388.0,241524.0,2022-09-08 22:14:08,,NaT,BCSB - No Attleboro Branch,North Attleboro,MA
4,2020-12-31,6252261,Hutchens Holding II LLC,Commercial Swap Mortgage,13405500.0,VAR,CML,CM43,ACT,0.026586,...,3996.0,4209.0,4432.0,230789.0,2024-07-29 12:35:49,CRE,NaT,BCSB - Comm'l Lending - Candleworks,Fall River,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91014,2020-12-31,150291616,"Fisher, Aquinnah C.",eChecking 16/17,100.0,,CK,CK03,ACT,0.000000,...,,,,,NaT,,NaT,BCSB - Walmart Branch,Taunton,MA
91015,2020-12-31,150513086,"Brooks, Cadeem C.",Basic Checking,50.0,,CK,CK02,ACT,0.000000,...,,,,,NaT,,NaT,BCSB - North Raynham Branch,Lowell,MA
91016,2020-12-31,27039064,Bristol County Savings Bank,Escrow Checks Processing,0.0,,BKCK,BTC3,ACT,0.000000,...,1221.0,1352.0,41577.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - Main Office,Taunton,MA
91017,2020-12-31,150337783,Bristol County Savings Bank,Business Checking,0.0,,CK,CK12,ACT,0.000000,...,1221.0,1352.0,51094.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - Contact Center,Taunton,MA


In [3]:
specified_date = datetime(2021, 12, 31)
data2021 = query_df_on_date(specified_date)
data2021

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88944 entries, 0 to 88943
Data columns (total 47 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   effdate                 88944 non-null  datetime64[ns]
 1   acctnbr                 88944 non-null  object        
 2   ownersortname           88944 non-null  object        
 3   product                 88944 non-null  object        
 4   noteopenamt             88944 non-null  float64       
 5   ratetypcd               70453 non-null  object        
 6   mjaccttypcd             88944 non-null  object        
 7   currmiaccttypcd         88944 non-null  object        
 8   curracctstatcd          88944 non-null  object        
 9   noteintrate             88944 non-null  float64       
 10  bookbalance             88944 non-null  float64       
 11  notebal                 88944 non-null  float64       
 12  contractdate            88944 non-null  dateti

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,portfolio_key,ownership_key,address_key,householdnbr,datelastmaint,Category,inactivedate,branchname,primaryownercity,primaryownerstate
0,2021-12-31,150200005,MoneyGram Payment Systems Inc,Treasurer's Check,0.0,,BKCK,TRCK,ACT,0.0000,...,41516.0,43261.0,44260.0,187340.0,2020-02-26 23:28:03,,NaT,BCSB - Main Office,Minneapolis,MN
1,2021-12-31,61053112,City Of Taunton,Municipal Money Market,0.0,VAR,CK,CK18,ACT,0.0010,...,4183.0,33668.0,33967.0,97724.0,2020-02-26 23:28:03,,NaT,BCSB - Muni Main Office,Taunton,MA
2,2021-12-31,60436522,Hodess Construction Corporation,Investment Business Checking,0.0,VAR,CK,CK28,ACT,0.0015,...,3315.0,3942.0,4388.0,241524.0,2022-09-08 22:14:08,,NaT,BCSB - No Attleboro Branch,North Attleboro,MA
3,2021-12-31,4426080386,"Barry, Richard L.",Statement Savings,0.0,VAR,SAV,SV02,ACT,0.0005,...,,,,,NaT,,NaT,BCSB - No Attleboro Branch,ATTLEBORO,MA
4,2021-12-31,60032826,City Of Attleboro,Municipal Now,0.0,VAR,CK,CK27,ACT,0.0003,...,5106.0,35118.0,35620.0,206654.0,2024-04-29 22:10:12,,NaT,BCSB - Muni Attleboro Branch,Attleboro,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88939,2021-12-31,27059932,Willow Tree Poultry Farm Inc,Business Checking,0.0,,CK,CK12,ACT,0.0000,...,33010.0,33626.0,33919.0,191089.0,2020-02-26 23:28:03,,NaT,BCSB - Attleboro Branch,Attleboro,MA
88940,2021-12-31,150569667,Bristol County Savings Bank,Business Checking,0.0,,CK,CK12,ACT,0.0000,...,1221.0,1352.0,62702.0,,NaT,,NaT,BCSB - Deposit Operations,Taunton,MA
88941,2021-12-31,27039064,Bristol County Savings Bank,Escrow Checks Processing,0.0,,BKCK,BTC3,ACT,0.0000,...,1221.0,1352.0,41577.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - Main Office,Taunton,MA
88942,2021-12-31,150337783,Bristol County Savings Bank,Business Checking,0.0,,CK,CK12,ACT,0.0000,...,1221.0,1352.0,51094.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - Contact Center,Taunton,MA


In [4]:
specified_date = datetime(2022, 12, 30)
data2022 = query_df_on_date(specified_date)
data2022

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88835 entries, 0 to 88834
Data columns (total 47 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   effdate                 88835 non-null  datetime64[ns]
 1   acctnbr                 88835 non-null  object        
 2   ownersortname           88835 non-null  object        
 3   product                 88835 non-null  object        
 4   noteopenamt             88835 non-null  float64       
 5   ratetypcd               70564 non-null  object        
 6   mjaccttypcd             88835 non-null  object        
 7   currmiaccttypcd         88835 non-null  object        
 8   curracctstatcd          88835 non-null  object        
 9   noteintrate             88835 non-null  float64       
 10  bookbalance             88835 non-null  float64       
 11  notebal                 88835 non-null  float64       
 12  contractdate            88835 non-null  dateti

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,portfolio_key,ownership_key,address_key,householdnbr,datelastmaint,Category,inactivedate,branchname,primaryownercity,primaryownerstate
0,2022-12-30,150200005,MONEYGRAM PAYMENT SYSTEMS INC,Treasurer's Check,0.0,,BKCK,TRCK,ACT,0.0000,...,41516.0,43261.0,44260.0,187340.0,2020-02-26 23:28:03,,NaT,BCSB - MAIN OFFICE,MINNEAPOLIS,MN
1,2022-12-30,150833377,"Alice Building, LLC",Commercial Mortgages,37275000.0,FIX,CML,CM40,ACT,0.0575,...,71066.0,74531.0,76376.0,260303.0,2022-12-23 22:25:51,CRE,NaT,BCSB - COMM'L LENDING- TAUNTON,Providence,RI
2,2022-12-30,60032826,CITY OF ATTLEBORO,Municipal Now,0.0,VAR,CK,CK27,ACT,0.0003,...,5106.0,35118.0,35620.0,206654.0,2024-04-29 22:10:12,,NaT,BCSB - MUNI ATTLEBORO BRANCH,ATTLEBORO,MA
3,2022-12-30,150586025,GREATER FALL RIVER V.S.D,Municipal Now,0.0,VAR,CK,CK27,ACT,0.0003,...,40677.0,42158.0,43236.0,185474.0,2021-03-25 22:19:28,,NaT,BCSB - MUNI FALL RIVER BRANCH,FALL RIVER,MA
4,2022-12-30,150809211,29 CENTER STREET LLC,Commercial Mortgages,18000000.0,FIX,CML,CM40,ACT,0.0490,...,3864.0,48607.0,75391.0,229910.0,2022-11-03 14:12:28,CRE,NaT,BCSB - COMM'L LENDING- TAUNTON,Taunton,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88830,2022-12-30,150818907,"BURY, CAROL A.",eChecking (18 & over),10.0,,CK,CK04,ACT,0.0000,...,,,,,NaT,,NaT,BCSB - MAIN OFFICE,East Taunton,MA
88831,2022-12-30,150569667,BRISTOL COUNTY SAVINGS BANK,Business Checking,0.0,,CK,CK12,DORM,0.0000,...,1221.0,1352.0,62702.0,,NaT,,NaT,BCSB - DEPOSIT OPERATIONS,TAUNTON,MA
88832,2022-12-30,27039064,BRISTOL COUNTY SAVINGS BANK,Escrow Checks Processing,0.0,,BKCK,BTC3,ACT,0.0000,...,1221.0,1352.0,41577.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - MAIN OFFICE,TAUNTON,MA
88833,2022-12-30,150337783,BRISTOL COUNTY SAVINGS BANK,Business Checking,0.0,,CK,CK12,ACT,0.0000,...,1221.0,1352.0,51094.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - CONTACT CENTER,TAUNTON,MA


In [5]:
specified_date = datetime(2023, 12, 29)
data2023 = query_df_on_date(specified_date)
data2023

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90750 entries, 0 to 90749
Data columns (total 47 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   effdate                 90750 non-null  datetime64[ns]
 1   acctnbr                 90750 non-null  object        
 2   ownersortname           90750 non-null  object        
 3   product                 90750 non-null  object        
 4   noteopenamt             90750 non-null  float64       
 5   ratetypcd               72775 non-null  object        
 6   mjaccttypcd             90750 non-null  object        
 7   currmiaccttypcd         90750 non-null  object        
 8   curracctstatcd          90750 non-null  object        
 9   noteintrate             90750 non-null  float64       
 10  bookbalance             90750 non-null  float64       
 11  notebal                 90750 non-null  float64       
 12  contractdate            90750 non-null  dateti

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,portfolio_key,ownership_key,address_key,householdnbr,datelastmaint,Category,inactivedate,branchname,primaryownercity,primaryownerstate
0,2023-12-29,150200005,MONEYGRAM PAYMENT SYSTEMS INC,Treasurer's Check,0.0,,BKCK,TRCK,ACT,0.000000,...,41516.0,43261.0,44260.0,187340.0,2020-02-26 23:28:03,,NaT,BCSB - MAIN OFFICE,MINNEAPOLIS,MN
1,2023-12-29,150847394,COUNTY OF BRISTOL,Municipal Money Market,0.0,VAR,CK,CK18,ACT,0.037500,...,4205.0,4427.0,4671.0,,NaT,,NaT,BCSB - MUNI MAIN OFFICE,TAUNTON,MA
2,2023-12-29,150887241,CITY OF TAUNTON,ICS Shadow - Muni,0.0,,CK,CK36,ACT,0.000000,...,4183.0,33668.0,33967.0,,NaT,,NaT,BCSB - MUNI MAIN OFFICE,TAUNTON,MA
3,2023-12-29,150936915,REDBROOK APARTMENTS LLC,Commercial Mortgages,65500000.0,VAR,CML,CM40,ACT,0.075959,...,57113.0,59661.0,61147.0,193906.0,2023-09-22 08:01:38,CRE,NaT,BCSB - COMM'L LENDING- TAUNTON,BRAINTREE,MA
4,2023-12-29,150847401,COUNTY OF BRISTOL,15 Month Municipal CD,0.0,FIX,TD,CD85,ACT,0.042500,...,4205.0,4427.0,4671.0,,NaT,,NaT,BCSB - MUNI MAIN OFFICE,TAUNTON,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90745,2023-12-29,150713503,"CAPPO MANAGEMENT LII, LLC",Business Checking,0.0,,CK,CK12,ACT,0.000000,...,65487.0,68575.0,70249.0,241811.0,2022-02-25 22:36:05,,NaT,BCSB - NORTH RAYNHAM BRANCH,BROCKTON,MA
90746,2023-12-29,27047636,BRISTOL COUNTY SAVINGS BANK,Payoff Overages,0.0,,BKCK,BTC2,ACT,0.000000,...,1221.0,1352.0,41645.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - MAIN OFFICE,TAUNTON,MA
90747,2023-12-29,27039064,BRISTOL COUNTY SAVINGS BANK,Escrow Checks Processing,0.0,,BKCK,BTC3,ACT,0.000000,...,1221.0,1352.0,41577.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - MAIN OFFICE,TAUNTON,MA
90748,2023-12-29,150337783,BRISTOL COUNTY SAVINGS BANK,Business Checking,0.0,,CK,CK12,ACT,0.000000,...,1221.0,1352.0,51094.0,255924.0,2022-09-27 22:46:27,,NaT,BCSB - MAIN OFFICE,TAUNTON,MA


In [6]:
specified_date = datetime(2024, 12, 31)
data2024 = query_df_on_date(specified_date)
data2024

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90812 entries, 0 to 90811
Data columns (total 47 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   effdate                 90812 non-null  datetime64[ns]
 1   acctnbr                 90812 non-null  object        
 2   ownersortname           90812 non-null  object        
 3   product                 90812 non-null  object        
 4   noteopenamt             90812 non-null  float64       
 5   ratetypcd               72843 non-null  object        
 6   mjaccttypcd             90812 non-null  object        
 7   currmiaccttypcd         90812 non-null  object        
 8   curracctstatcd          90812 non-null  object        
 9   noteintrate             90812 non-null  float64       
 10  bookbalance             90812 non-null  float64       
 11  notebal                 90812 non-null  float64       
 12  contractdate            90811 non-null  dateti

Unnamed: 0,effdate,acctnbr,ownersortname,product,noteopenamt,ratetypcd,mjaccttypcd,currmiaccttypcd,curracctstatcd,noteintrate,...,portfolio_key,ownership_key,address_key,householdnbr,datelastmaint,Category,inactivedate,branchname,primaryownercity,primaryownerstate
0,2024-12-31,150200005,MONEYGRAM PAYMENT SYSTEMS INC,Treasurer's Check,0.0,,BKCK,TRCK,ACT,0.000000,...,41516,43261,44260,187340.0,2020-02-26 23:28:03,,NaT,BCSB - MAIN OFFICE,MINNEAPOLIS,MN
1,2024-12-31,150586025,GREATER FALL RIVER V.S.D,Municipal Now,0.0,VAR,CK,CK27,ACT,0.000300,...,40677,42158,43236,185474.0,2021-03-25 22:19:28,,NaT,BCSB - MUNI FALL RIVER BRANCH,FALL RIVER,MA
2,2024-12-31,150936915,REDBROOK APARTMENTS LLC,Commercial Mortgages,65500000.0,VAR,CML,CM40,ACT,0.067809,...,57113,59661,61147,193906.0,2023-09-22 08:01:38,CRE,NaT,BCSB - COMM'L LENDING- TAUNTON,BRAINTREE,MA
3,2024-12-31,150847394,COUNTY OF BRISTOL,Municipal Money Market,0.0,VAR,CK,CK18,ACT,0.035000,...,4205,4427,4671,,NaT,,NaT,BCSB - MUNI MAIN OFFICE,TAUNTON,MA
4,2024-12-31,151058057,NBPIV SARATOGA LLC,Commercial Mortgages,26000000.0,FIX,CML,CM40,ACT,0.063500,...,79216,83546,85431,288455.0,2024-06-11 12:02:04,CRE,NaT,BCSB - COMM'L LENDING- TAUNTON,WAKEFIELD,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90807,2024-12-31,27047237,FIRST AUTOMOTIVE GROUP INC,Business Checking,0.0,,CK,CK12,ACT,0.000000,...,3730,3935,41292,188829.0,2023-10-19 11:11:11,,NaT,BCSB - MAIN OFFICE,NORTH ATTLEBORO,MA
90808,2024-12-31,151116235,FTRF 143 INC,Simple Business Checking,500.0,,CK,CK25,ACT,0.000000,...,81398,85988,87850,296485.0,2024-12-16 20:42:14,,NaT,BCSB - CANDLEWORKS BRANCH,EAST WAREHAM,MA
90809,2024-12-31,27039064,BRISTOL COUNTY SAVINGS BANK,Escrow Checks Processing,0.0,,BKCK,BTC3,ACT,0.000000,...,1221,1352,41577,255924.0,2022-09-27 22:46:27,,NaT,BCSB - MAIN OFFICE,TAUNTON,MA
90810,2024-12-31,27020568,BRISTOL COUNTY SAVINGS BANK,PDO Checks - OLD,0.0,,BKCK,BTC1,ACT,0.000000,...,1221,1352,41437,255924.0,2022-09-27 22:46:27,,NaT,BCSB - MAIN OFFICE,TAUNTON,MA


In [21]:
def get_deposit_totals(df):
    df = df[df['mjaccttypcd'].isin(['CK','SAV','TD'])].copy()
    sum = df['Net Balance'].sum()
    return sum

In [22]:
sum2020 = get_deposit_totals(data2020) 

In [None]:
f"{sum2020:,.2f}"

'2,283,515,795.77'

In [None]:
sum2021 = get_deposit_totals(data2021)


In [None]:
sum2022 = get_deposit_totals(data2022)
sum2023 = get_deposit_totals(data2023)
sum2024 = get_deposit_totals(data2024)


In [27]:
print(f"2020: {sum2020:,.2f}")
print(f"2021: {sum2021:,.2f}")
print(f"2022: {sum2022:,.2f}")
print(f"2023: {sum2023:,.2f}")
print(f"2024: {sum2024:,.2f}")


2020: 2,283,515,795.77
2021: 2,513,328,664.16
2022: 2,400,121,111.40
2023: 2,451,179,978.56
2024: 2,509,165,954.44


In [10]:
data = fetch_data()

In [11]:
wh_addr = data['wh_addr'].copy()
orgaddruse = data['orgaddruse'].copy()

In [12]:
persaddruse = data['persaddruse'].copy()

In [13]:
wh_addr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376318 entries, 0 to 376317
Data columns (total 57 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   addrnbr           376318 non-null  int64         
 1   linenbr           376318 non-null  int64         
 2   rundate           376318 non-null  datetime64[ns]
 3   ctrycd            376318 non-null  object        
 4   nextlinenbr       376318 non-null  int64         
 5   ctrysubdivcd      138 non-null     object        
 6   ctrymailcd        2473 non-null    object        
 7   statecd           269156 non-null  object        
 8   cityname          376318 non-null  object        
 9   citynamesndx      301400 non-null  object        
 10  zipcd             269133 non-null  object        
 11  zipsuf            205936 non-null  object        
 12  censustrtnbr      5503 non-null    object        
 13  smsanbr           5405 non-null    object        
 14  post

In [16]:
wh_addr

Unnamed: 0,addrnbr,linenbr,rundate,ctrycd,nextlinenbr,ctrysubdivcd,ctrymailcd,statecd,cityname,citynamesndx,...,addrlinetypdesc7,addrlinetypseq7,addrtextsndx7,mailaddryn,mailtypcd,mailtypdesc,addrusecd,addrusedesc,electronicyn,datelastmaint
0,1004857,1,2025-07-15,USA,2,,,MA,NEWBURYPORT,N161,...,,,,Y,,,RES,Residential,N,2025-07-15 21:33:45
1,1004858,1,2025-07-15,USA,2,,,MA,FALL RIVER,F461,...,,,,Y,,,RES,Residential,N,2025-07-15 21:33:45
2,1004859,1,2025-07-15,USA,2,,,MA,WEST HYANNISPORT,W235,...,,,,Y,,,RES,Residential,N,2025-07-15 21:33:45
3,1004860,1,2025-07-15,USA,2,,,MA,MANSFIELD,M521,...,,,,Y,,,RES,Residential,N,2025-07-15 21:33:45
4,1004861,1,2025-07-15,USA,2,,,MA,ROCKLAND,R245,...,,,,Y,,,PRI,Primary,N,2025-07-15 21:33:45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376313,1432363,1,2025-07-15,USA,2,,,,EMAIL,E540,...,,,,Y,,,HEML,Home Email Address,Y,2025-07-15 21:34:37
376314,1432364,1,2025-07-15,USA,2,,,RI,LINCOLN,L524,...,,,,Y,,,,,N,2025-07-15 21:34:37
376315,1432368,1,2025-07-15,USA,2,,,MA,EAST WALPOLE,,...,,,,Y,,,PRI,Primary,N,2025-07-15 21:34:37
376316,1432370,1,2025-07-15,USA,2,,,MA,EAST FREETOWN,,...,,,,Y,,,PRI,Primary,N,2025-07-15 21:34:37


In [14]:
persaddruse.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 328697 entries, 0 to 328696
Data columns (total 13 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   persnbr        328697 non-null  int64         
 1   addrusecd      328697 non-null  object        
 2   addrnbr        328697 non-null  int64         
 3   startdate      101 non-null     datetime64[ns]
 4   stopdate       94 non-null      datetime64[ns]
 5   inactivedate   0 non-null       object        
 6   effdate        328697 non-null  datetime64[ns]
 7   occupancydate  0 non-null       object        
 8   startmonthcd   35 non-null      object        
 9   startdaynbr    35 non-null      float64       
 10  stopmonthcd    35 non-null      object        
 11  stopdaynbr     35 non-null      float64       
 12  datelastmaint  328697 non-null  datetime64[ns]
dtypes: datetime64[ns](4), float64(2), int64(2), object(5)
memory usage: 32.6+ MB


In [15]:
orgaddruse.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34065 entries, 0 to 34064
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   orgnbr         34065 non-null  int64         
 1   addrusecd      34065 non-null  object        
 2   addrnbr        34065 non-null  int64         
 3   startdate      6 non-null      datetime64[ns]
 4   stopdate       5 non-null      datetime64[ns]
 5   inactivedate   0 non-null      object        
 6   effdate        34065 non-null  datetime64[ns]
 7   occupancydate  0 non-null      object        
 8   startmonthcd   2 non-null      object        
 9   startdaynbr    2 non-null      float64       
 10  stopmonthcd    2 non-null      object        
 11  stopdaynbr     2 non-null      float64       
 12  datelastmaint  34065 non-null  datetime64[ns]
dtypes: datetime64[ns](4), float64(2), int64(2), object(5)
memory usage: 3.4+ MB
