In [14]:
from pathlib import Path
from typing import List

import pandas as pd

import src.cdutils.database
import src.cdutils.input_cleansing

In [15]:
data_nov23 = src.cdutils.database.fetch_data_nov23()
data_dec24 = src.cdutils.database.fetch_data_dec24()


In [16]:
data_nov23 = data_nov23['acctcommon'].copy()
data_dec24 = data_dec24['acctcommon'].copy()

In [17]:
data_nov23

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,currmiaccttypcd,bookbalance,acctofficer,ownersortname,curracctstatcd,contractdate,branchname,noteintrate,primaryownercity,primaryownerstate
0,100953,2023-11-01,TD,IRA >12 Month CmpDly CD,IR99,75373.57,TIFFANY J. CAHILL,"SABATINI, SYLVIA E.",ACT,2002-10-08,BCSB - GREENVILLE,0.0144,GREENVILLE,RI
1,101709,2023-11-01,SAV,Statement Savings,SV02,951.72,SHAQUITA L. WILSON,"PIANKA, CORI L.",ACT,2005-09-09,BCSB - CUMBERLAND,0.0005,GREENVILLE,RI
2,101730,2023-11-01,CK,Simple Business Checking,CK25,245.76,TIFFANY J. CAHILL,EXCITE! DANCE COMPANY,ACT,2004-03-09,BCSB - GREENVILLE,0,Chepachet,RI
3,101746,2023-11-01,CK,eChecking (18 & over),CK04,424.44,TIFFANY J. CAHILL,"TURNER, SHANYN N.",ACT,2004-03-16,BCSB - GREENVILLE,0,SMITHFIELD,RI
4,100170,2023-11-01,CK,Simple Business Checking,CK25,149.48,TIFFANY J. CAHILL,"BERNSTEIN, WILLIAM L.",ACT,2002-02-06,BCSB - GREENVILLE,0,CHEPACHET,RI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60422,150965857,2023-11-01,CK,Prime Time Checking,CK06,11288.5,Justin A. Jeffrey,"Johnson, Andrea K.",ACT,2023-10-27,BCSB - DARTMOUTH BRANCH,0.0005,South Dartmouth,MA
60423,4409408,2023-11-01,SAV,Statement Savings,SV02,1564.89,JACQUELINE A. THEIS,"GALLOWAY, SARAH M.",ACT,2010-03-10,BCSB - MAIN OFFICE,0.0005,TAUNTON,MA
60424,150781724,2023-11-01,CK,NOW Checking,CK05,123.01,SUSAN M. FARLEY,"STRINGER, JAMES E. II",ACT,2022-08-08,BCSB - NORTH RAYNHAM BRANCH,0.0003,NEW BEDFORD,MA
60425,150781732,2023-11-01,CK,Simple Business Checking,CK25,172.6,Justin A. Jeffrey,CUP OF JO JOS COFFEE LLC,ACT,2022-08-08,BCSB - DARTMOUTH BRANCH,0,South Dartmouth,MA


In [18]:
data_dec24

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,currmiaccttypcd,bookbalance,acctofficer,ownersortname,curracctstatcd,contractdate,branchname,noteintrate,primaryownercity,primaryownerstate
0,151022408,2024-12-31,CK,Municipal Now,CK27,2500.59,JOHN G. DUGGAN,TOWN OF WESTBOROUGH,ACT,2024-03-19,BCSB - MUNI MAIN OFFICE,0.0003,WESTBOROUGH,MA
1,4465022727,2024-12-31,SAV,Statement Savings,SV02,5.4,SANDRA J. VANDETTE,"DONLEY, LUCILLE",ACT,2013-05-13,BCSB - NO ATTLEBORO BRANCH,0.0005,NORTH ATTLEBORO,MA
2,150871096,2024-12-31,CK,Prime Time Checking,CK06,9642.13,SANDRA J. VANDETTE,"JOHNSON, JEANNE M.",ACT,2023-03-16,BCSB - NO ATTLEBORO BRANCH,0.0005,ATTLEBORO,MA
3,4465001622,2024-12-31,SAV,Statement Savings,SV02,1687.47,SANDRA J. VANDETTE,"SANTOES, DAVID J.",ACT,2014-06-30,BCSB - NO ATTLEBORO BRANCH,0.0005,ATTLEBORO FALLS,MA
4,9011706,2024-12-31,TD,5 Year IRA CD,IR20,6299.22,JACQUELINE A. THEIS,"GRISWOLD, CYNTHIA M.",ACT,2014-07-21,BCSB - MAIN OFFICE,0.035,TAUNTON,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62833,150985540,2024-12-31,TD,9 Month Prime Time CD,CD23,39942.9,KAITLYN M. SILVA,"FOSTER, ALBERT JR",ACT,2023-12-19,BCSB - EAST FREETOWN BRANCH,0.0395,E FREETOWN,MA
62834,150978462,2024-12-31,CK,eChecking (18 & over),CK04,286.22,KAITLYN M. SILVA,"THOMPSON, MERION W.",ACT,2023-11-30,BCSB - EAST FREETOWN BRANCH,0,ROCHESTER,MA
62835,150987380,2024-12-31,TD,1 Year Prime Time CD,CD68,156903.9,KAITLYN M. SILVA,"FOSTER, DONALD A.",ACT,2023-12-27,BCSB - EAST FREETOWN BRANCH,0.04,LAKEVILLE,MA
62836,151055384,2024-12-31,CK,Business Checking,CK12,130.44,KEVIN M. MCCARTHY,NB PARTNERS FUND IV REIT LLC,ACT,2024-05-28,BCSB - MAIN OFFICE,0,WAKEFIELD,MA


In [4]:

schema = {
    'noteintrate': float,
    'bookbalance': float
}


In [5]:
data_nov23 = src.cdutils.input_cleansing.enforce_schema(data_nov23, schema).copy()
data_dec24 = src.cdutils.input_cleansing.enforce_schema(data_dec24, schema).copy()


In [6]:
from typing import Dict

In [7]:
def generate_officer_df(data: pd.DataFrame) -> pd.DataFrame:
    """
    Group officers and sum balance
    """
    df = data.groupby('acctofficer')['bookbalance'].sum().reset_index()
    return df


In [8]:
data_nov23 = generate_officer_df(data_nov23)
data_dec24 = generate_officer_df(data_dec24)

In [9]:
data_nov23

Unnamed: 0,acctofficer,bookbalance
0,ALISSA ERIN HALL,39813350.0
1,AMY M. BRIGGS,145097100.0
2,AN T. LE,35813640.0
3,ANDREW J. OMER,77165640.0
4,ANDREW K. SPRINGER,20435800.0
5,ANGELEBETH H. FARIA,46508840.0
6,Amman Haidri,29824120.0
7,BRIAN MCMAHON,188575.7
8,Brandon Cannata,13585820.0
9,CHRISTINE M. PAREDES,31352880.0


In [10]:
data_dec24

Unnamed: 0,acctofficer,bookbalance
0,ALISSA E. HALL,49180300.0
1,AMMAN A. HAIDRI,17635860.0
2,AMY L. LECUYER,8.0
3,AMY M. BRIGGS,143224200.0
4,AN T. LE,30926570.0
5,ANDREW J. OMER,33698690.0
6,ANDREW RODRIGUES,7120282.0
7,ANGELEBETH H. FARIA,146284.5
8,BRANDON CANNATA,17136320.0
9,BRIAN MCMAHON,181982.4


In [11]:
merged_df = pd.merge(data_nov23, data_dec24, on='acctofficer', how='outer', suffixes=['_nov23','_dec24'])

In [None]:
merged_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69 entries, 0 to 68
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   acctofficer        69 non-null     object 
 1   bookbalance_nov23  53 non-null     float64
 2   bookbalance_dec24  58 non-null     float64
dtypes: float64(2), object(1)
memory usage: 1.7+ KB


In [3]:
acctcommon = data['acctcommon'].copy()


In [4]:
acctcommon

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,currmiaccttypcd,bookbalance,acctofficer,ownersortname,curracctstatcd,contractdate,branchname,noteintrate,primaryownercity,primaryownerstate
0,151022408,2024-12-31,CK,Municipal Now,CK27,2500.59,JOHN G. DUGGAN,TOWN OF WESTBOROUGH,ACT,2024-03-19,BCSB - MUNI MAIN OFFICE,0.0003,WESTBOROUGH,MA
1,151045830,2024-12-31,CK,eChecking (18 & over),CK04,363.35,MICHAEL A. HEY,"CARLINE, JILL K.",ACT,2024-05-05,BCSB - ATTLEBORO BRANCH,0,ATTLEBORO,MA
2,151042464,2024-12-31,CK,Money Market Statement,CK08,287.29,JUSTIN A. JEFFREY,"DUARTE, PATRICIA D.",ACT,2024-04-26,BCSB - DARTMOUTH BRANCH,0.0006,DARTMOUTH,MA
3,151041531,2024-12-31,TD,6 Month Business CD,CD17,717404.37,ALISSA E. HALL,THE ARC OF BRISTOL COUNTY INC,ACT,2024-04-25,BCSB - ATTLEBORO BRANCH,0.045,ATTLEBORO,MA
4,151036574,2024-12-31,CK,NOW Checking,CK05,164.62,SANDRA J. VANDETTE,"DONLEY, KEVIN B.",ACT,2024-04-15,BCSB - NO ATTLEBORO BRANCH,0.0003,NORTH ATTLEBORO,MA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8375,150985540,2024-12-31,TD,9 Month Prime Time CD,CD23,39942.9,KAITLYN M. SILVA,"FOSTER, ALBERT JR",ACT,2023-12-19,BCSB - EAST FREETOWN BRANCH,0.0395,E FREETOWN,MA
8376,150978462,2024-12-31,CK,eChecking (18 & over),CK04,286.22,KAITLYN M. SILVA,"THOMPSON, MERION W.",ACT,2023-11-30,BCSB - EAST FREETOWN BRANCH,0,ROCHESTER,MA
8377,150987380,2024-12-31,TD,1 Year Prime Time CD,CD68,156903.9,KAITLYN M. SILVA,"FOSTER, DONALD A.",ACT,2023-12-27,BCSB - EAST FREETOWN BRANCH,0.04,LAKEVILLE,MA
8378,151055384,2024-12-31,CK,Business Checking,CK12,130.44,KEVIN M. MCCARTHY,NB PARTNERS FUND IV REIT LLC,ACT,2024-05-28,BCSB - MAIN OFFICE,0,WAKEFIELD,MA


In [None]:
grouped_minors = acctcommon.groupby('currmiaccttypcd')['product'].first()


In [None]:
grouped_minors

In [None]:
minors = [
    'CK24', # 1st Business Checking
    'CK12', # Business Checking
    'CK25', # Simple Business Checking
    'CK30', # Business Elite Money Market
    'CK19', # Business Money Market
    'CK22', # Business Premium Plus MoneyMkt
    'CK23', # Premium Business Checking
    'CK40', # Community Assoc Reserve
    'CD67', # Commercial Negotiated Rate
    'CD01', # 1 Month Business CD
    'CD07', # 3 Month Business CD
    'CD17', # 6 Month Business CD
    'CD31', # 1 Year Business CD
    'CD35', # 1 Year Business CD
    'CD37', # 18 Month Business CD
    'CD38', # 2 Year Business CD
    'CD50', # 3 Year Business CD
    'CD53', # 4 Year Business CD
    'CD59', # 5 Year Business CD
    'CD76', # 9 Month Business CD
    'CD84', # 15 Month Business CD
    'CD95', # Business <12 Month Simple CD
    'CD96', # Business >12 Month Simple CD
    'CK28', # Investment Business Checking
    'CK33', # Specialty Business Checking
    'CK34', # ICS Shadow - Business - Demand
    'SV06' # Business Select High Yield
]

In [15]:
def filter_to_business_deposits(df: pd.DataFrame, minors: List) -> pd.DataFrame:
    """
    Filter the total deposit account dataset to specific business minors
    """
    df = df[df['currmiaccttypcd'].isin(minors)].copy()
    return df
    

In [16]:
df_filtered = filter_to_business_deposits(acctcommon, minors)

In [21]:
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 944 entries, 3365 to 6542
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   acctnbr            944 non-null    int64         
 1   effdate            944 non-null    datetime64[ns]
 2   mjaccttypcd        944 non-null    object        
 3   product            944 non-null    object        
 4   currmiaccttypcd    944 non-null    object        
 5   bookbalance        944 non-null    object        
 6   acctofficer        940 non-null    object        
 7   ownersortname      944 non-null    object        
 8   curracctstatcd     944 non-null    object        
 9   contractdate       944 non-null    datetime64[ns]
 10  branchname         944 non-null    object        
 11  noteintrate        944 non-null    object        
 12  primaryownercity   944 non-null    object        
 13  primaryownerstate  944 non-null    object        
dtypes: datetime

In [17]:
df_filtered['product'].unique()

array(['6 Month Business CD', 'ICS Shadow - Business - Demand',
       'Simple Business Checking', 'Business Checking',
       '1 Year Business CD', 'Business Elite Money Market',
       '3 Month Business CD', 'Business Money Market',
       '1st Choice Business Checking', 'Business Select High Yield',
       '9 Month Business CD', 'Community Assoc Reserve',
       '5 Year Business CD'], dtype=object)

In [20]:
df_filtered = df_filtered.sort_values(by='contractdate')