In [1]:
"""
Main Entry Point
"""
from pathlib import Path

import pandas as pd # type: ignore

import cdutils.pkey_sqlite # type: ignore
import cdutils.filtering # type: ignore
import cdutils.input_cleansing # type: ignore
import cdutils.cmo_append # type: ignore
import src.add_fields
import src.core_transform
import src.output_to_excel
from src._version import __version__
import src.output_to_excel_multiple_sheets

# def main(production_flag: bool=False):
#     if production_flag:
#         BASE_PATH = Path(r'\\00-DA1\Home\Share\Line of Business_Shared Services')
#         assert "prod" in __version__, (f"Cannot run in production mode without 'prod' in the __version__")
#     else:
#         BASE_PATH = Path('.')

   
    # # Output to excel (raw data)
    # OUTPUT_PATH = BASE_PATH / Path('./output/concentration_deposits.xlsx')
    # data.to_excel(OUTPUT_PATH, sheet_name='Unformatted', index=False)




In [2]:
# Get staging data from the daily deposit update. View dev section of documentation for more detail
INPUT_PATH = Path(r"\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Data_Analytics\Daily_Deposit_Update\Production\output\DailyDeposit_staging.xlsx")
data = pd.read_excel(INPUT_PATH)

# Add portfolio key
data = cdutils.pkey_sqlite.add_pkey(data)

# Add int rate
data = src.add_fields.add_noteintrate(data)


# Custom list of minors (Business Deposits)
minors = [
    'CK24', # 1st Business Checking
    'CK12', # Business Checking
    'CK25', # Simple Business Checking
    'CK30', # Business Elite Money Market
    'CK19', # Business Money Market
    'CK22', # Business Premium Plus MoneyMkt
    'CK23', # Premium Business Checking
    'CK40', # Community Assoc Reserve
    'CD67', # Commercial Negotiated Rate
    'CD01', # 1 Month Business CD
    'CD07', # 3 Month Business CD
    'CD17', # 6 Month Business CD
    'CD31', # 1 Year Business CD
    'CD35', # 1 Year Business CD
    'CD37', # 18 Month Business CD
    'CD38', # 2 Year Business CD
    'CD50', # 3 Year Business CD
    'CD53', # 4 Year Business CD
    'CD59', # 5 Year Business CD
    'CD76', # 9 Month Business CD
    'CD84', # 15 Month Business CD
    'CD95', # Business <12 Month Simple CD
    'CD96', # Business >12 Month Simple CD
    'CK28', # Investment Business Checking
    'CK33', # Specialty Business Checking
    'CK34', # ICS Shadow - Business - Demand
    'SV06' # Business Select High Yield
]

# Filter to only business deposit accounts
data = cdutils.filtering.filter_to_business_deposits(data, minors)


# Add CMO
data = cdutils.cmo_append.append_cmo(data)


data_schema = {
    'noteintrate': float
}

data = cdutils.input_cleansing.enforce_schema(data, data_schema).copy()


 

In [18]:
# Exclude BCSB internal accounts
data = data[~data['ownersortname'].str.contains('BRISTOL COUNTY SAVINGS', case=False, na=False)].copy()

In [19]:
ASSETS_PATH = Path('./assets')

files = [f for f in ASSETS_PATH.iterdir() if f.is_file()]

assert len(files) == 1, f"Expected exactly 1 file in {ASSETS_PATH}, found {len(files)}."

file = files[0]
assert file.suffix == '.csv', f"Expected a .csv file"

xaa_data = pd.read_csv(file)

In [20]:
xaa_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4492 entries, 0 to 4491
Data columns (total 10 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Account Number                         4492 non-null   int64  
 1   Analysis Account Type                  4492 non-null   int64  
 2   Account Name                           4492 non-null   object 
 3   Cycle End Date                         4492 non-null   object 
 4   Combined Result for Settlement Period  4492 non-null   object 
 5   Analyzed Result Disposition            4492 non-null   object 
 6   Debit Account Number                   4492 non-null   int64  
 7   Primary Officer Name                   3518 non-null   object 
 8   Secondary Officer Name                 0 non-null      float64
 9   Treasury Officer Name                  2160 non-null   object 
dtypes: float64(1), int64(3), object(6)
memory usage: 351.1+ KB


In [21]:
xaa_data['Combined Result for Settlement Period'] = xaa_data['Combined Result for Settlement Period'].str.replace('[\$,]','',regex=True)

In [22]:
xaa_schema = {
    'Combined Result for Settlement Period':'float',
    'Debit Account Number':'str'
}
xaa_data = cdutils.input_cleansing.enforce_schema(xaa_data, xaa_schema)


In [23]:
summarized_xaa = xaa_data.groupby('Debit Account Number').agg({
    'Combined Result for Settlement Period':'sum',
    'Treasury Officer Name':'first'
}).reset_index()

In [24]:
summarized_xaa = summarized_xaa.rename(columns={
    'Treasury Officer Name':'Treasury Officer Name_XAA',
    'Combined Result for Settlement Period':'Total Fees',
    'Debit Account Number':'acctnbr'
}).copy()

In [25]:
summarized_xaa

Unnamed: 0,acctnbr,Total Fees,Treasury Officer Name_XAA
0,1000010034,0.0,STEPHEN SHERMAN
1,100038,0.0,
2,100042,0.0,
3,100060,0.0,
4,100102,35.0,
...,...,...,...
4483,63701804,0.0,STEPHEN SHERMAN
4484,9001819,0.0,STEPHEN SHERMAN
4485,919666,0.0,STEPHEN SHERMAN
4486,996290,0.0,STEPHEN SHERMAN


In [26]:
assert summarized_xaa['acctnbr'].is_unique, "Duplicates"

In [27]:
merged_data = pd.merge(data, summarized_xaa, on='acctnbr', how='left')

In [28]:
merged_data['Total Fees'] = merged_data['Total Fees'].fillna(0)

In [29]:
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6513 entries, 0 to 6512
Data columns (total 25 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   acctnbr                    6513 non-null   object        
 1   effdate                    6513 non-null   datetime64[ns]
 2   mjaccttypcd                6513 non-null   object        
 3   product                    6513 non-null   object        
 4   notebal                    6513 non-null   float64       
 5   notemtdavgbal              6513 non-null   float64       
 6   currmiaccttypcd            6513 non-null   object        
 7   acctofficer                6508 non-null   object        
 8   ownersortname              6513 non-null   object        
 9   curracctstatcd             6513 non-null   object        
 10  contractdate               6512 non-null   datetime64[ns]
 11  ytdavgbal                  6513 non-null   float64       
 12  3Mo_Av

In [30]:
# Core transformation pipeline
formatted_data = src.core_transform.main_pipeline(merged_data)

In [31]:
formatted_data

Unnamed: 0,portfolio_key,acctnbr,ownersortname,product,mjaccttypcd,currmiaccttypcd,acctofficer,notebal,noteintrate,curracctstatcd,...,3Mo_AvgBal,TTM_AvgBal,Year Ago Balance,TTM_DAYS_OVERDRAWN,TTM_NSF_COUNT,YTD_DAYS_OVERDRAWN,YTD_NSF_COUNT,Cash Management Officer,Total Fees,Treasury Officer Name_XAA
0,3544,60801557,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Investment Business Checking,CK,CK28,KEVIN M. MCCARTHY,22578083.1,0.022,ACT,...,21016506.363333,15343577.084167,14431024.46,0,0,0,0,,0.0,STEPHEN SHERMAN
1,3544,27063948,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Business Checking,CK,CK12,KEVIN M. MCCARTHY,1000000.0,0.0,ACT,...,1024850.413333,1013580.605,924620.3,1,0,0,0,,0.0,
2,3544,60076593,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",Business Elite Money Market,CK,CK30,KEVIN M. MCCARTHY,815949.86,0.0245,ACT,...,810978.963333,803539.579167,792805.98,0,0,0,0,,0.0,
3,3544,,"COMMUNITY COUNSELING OF BRISTOL COUNTY, INCORP...",,,,,24394032.96,0.0465,,...,22852335.74,17160697.268333,16148450.74,1,0,0,0,,0.0,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13138,,,,,,,,,,,...,,,,,,,,,,
13139,39128,27005739,LANGWAY TOYOTA OF NEWPORT,Business Checking,CK,CK12,JACQUELINE A. THEIS,0.0,0.0,ACT,...,0.0,0.0,0.0,0,0,0,0,,0.0,
13140,39128,27044610,LANGWAY TOYOTA OF NEWPORT,Business Checking,CK,CK12,JACQUELINE A. THEIS,-1350.0,0.0,ACT,...,1458.046667,335.0725,-1087.75,32,0,20,0,,0.0,
13141,39128,,LANGWAY TOYOTA OF NEWPORT,,,,,-1350.0,0.0,,...,1458.046667,335.0725,-1087.75,32,0,20,0,,0.0,


In [32]:
# Output to excel (raw data)
BASE_PATH = '.'
OUTPUT_PATH = BASE_PATH / Path('./output/business_deposits_concentration.xlsx')
with pd.ExcelWriter(OUTPUT_PATH, engine="openpyxl") as writer:
    merged_data.to_excel(writer, sheet_name='Unformatted', index=False)
    formatted_data.to_excel(writer, sheet_name='Sheet1', index=False)

# Format excel
src.output_to_excel_multiple_sheets.format_excel_file(OUTPUT_PATH)

# if __name__ == '__main__':
#     print(f"Starting [{__version__}]")
#     # main(production_flag=True)
#     main()
#     print("Complete!")

