In [17]:
# Report Request - Dave Medieros & Stephanie Nordberg (Business Deposit Accts)
# Developed by CD
# v1.0.4

### Notes:
Dave needs me to filter to only
- Simple Bus Checking
- 1st Choice Checking
- Business Checking
- IOLTA Checking
- Community Checking.


In [23]:
import os
from datetime import datetime, timedelta, date
import json
from sqlalchemy import create_engine, text
from sqlalchemy.ext.asyncio import create_async_engine
from typing import List
import numpy as np
import time
from collections import defaultdict, Counter
import hashlib
import pandas as pd
import base64
import math
from cryptography.fernet import Fernet
from dotenv import load_dotenv
from io import StringIO
from pathlib import Path
import asyncio
import nest_asyncio
import sys
nest_asyncio.apply()

if sys.platform == "win32":
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())


def retrieve_data():
    """
    Retrieve data from COCC database
    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path
            
            # Load private key
            key_key_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Utility\env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Utility\env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Utility\env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        except Exception as e:
            print(f"Error")
            raise
        finally:
            await db_handler.close()

    def run_sql_queries():
        # lookup table
        # Engine 1
        lookup_df = text("""
        SELECT 
            *
        FROM 
            sys.all_tab_columns col
        """)

        # acctcommon
        # engine 1
        acctcommon = text("""
        SELECT 
            a.ACCTNBR,
            a.MJACCTTYPCD,
            a.CURRMIACCTTYPCD,
            a.PRODUCT,
            a.OWNERSORTNAME,
            a.CURRACCTSTATCD,
            a.CONTRACTDATE,
            a.DATEMAT,
            a.BOOKBALANCE,
            a.NOTEINTRATE,
            a.TAXRPTFORORGNBR,
            a.TAXRPTFORPERSNBR,
            a.BRANCHNAME,
            a.ACCTOFFICER,
            a.EFFDATE
        FROM 
            COCCDM.WH_ACCTCOMMON a
        WHERE
            a.MONTHENDYN = 'Y'
            AND a.EFFDATE >= ADD_MONTHS(SYSDATE, -24)
        """)

        queries = [
            # {'key':'lookup_df', 'sql':lookup_df, 'engine':2},
            {'key':'acctcommon', 'sql':acctcommon, 'engine':2},
            # {'key':'persaddruse', 'sql':persaddruse, 'engine':1},
            # {'key':'orgaddruse', 'sql':orgaddruse, 'engine':1},
            # {'key':'wh_addr', 'sql':wh_addr, 'engine':1},
            # {'key':'wh_allroles', 'sql':wh_allroles, 'engine':1},
        ]

        async def run_queries():
            return await fetch_data(queries)
        
        loop = asyncio.get_event_loop()
        if loop.is_running():
            return loop.run_until_complete(run_queries())
        else:
            return asyncio.run(run_queries())
        
    data = run_sql_queries()
    
    return data

In [24]:
data = retrieve_data()

In [25]:
acctcommon = data['acctcommon'].copy()

In [26]:
acctcommon = acctcommon[acctcommon['curracctstatcd'].isin(['ACT','DORM'])]

In [27]:
acctcommon = acctcommon[acctcommon['mjaccttypcd'].isin(['CK','SAV','TD'])]

In [28]:
acctcommon = acctcommon[~acctcommon['taxrptfororgnbr'].isnull()]

In [29]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 204366 entries, 270 to 3697821
Data columns (total 15 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   acctnbr           204366 non-null  int64         
 1   mjaccttypcd       204366 non-null  object        
 2   currmiaccttypcd   204366 non-null  object        
 3   product           204366 non-null  object        
 4   ownersortname     204366 non-null  object        
 5   curracctstatcd    204366 non-null  object        
 6   contractdate      204354 non-null  datetime64[ns]
 7   datemat           10355 non-null   datetime64[ns]
 8   bookbalance       204366 non-null  object        
 9   noteintrate       204366 non-null  object        
 10  taxrptfororgnbr   204366 non-null  float64       
 11  taxrptforpersnbr  0 non-null       float64       
 12  branchname        204366 non-null  object        
 13  acctofficer       204216 non-null  object        
 14  effdat

In [30]:
acctcommon[~acctcommon['datemat'].isnull()]

Unnamed: 0,acctnbr,mjaccttypcd,currmiaccttypcd,product,ownersortname,curracctstatcd,contractdate,datemat,bookbalance,noteintrate,taxrptfororgnbr,taxrptforpersnbr,branchname,acctofficer,effdate
1133,101877,TD,CD64,5 Year Prime Time CD,DANTE ALDOVER RAMOS IRREV TRUS,ACT,2003-08-07,2027-07-06,36509.2,0.01,1010418.0,,BCSB - CUMBERLAND,SHAQUITA L. WILSON,2023-01-31
1146,101877,TD,CD64,5 Year Prime Time CD,DANTE ALDOVER RAMOS IRREV TRUS,ACT,2003-08-07,2027-07-06,36540.22,0.01,1010418.0,,BCSB - CUMBERLAND,SHAQUITA L. WILSON,2023-02-28
1147,101877,TD,CD64,5 Year Prime Time CD,DANTE ALDOVER RAMOS IRREV TRUS,ACT,2003-08-07,2027-07-06,36568.26,0.01,1010418.0,,BCSB - CUMBERLAND,SHAQUITA L. WILSON,2023-03-31
2543,103236,TD,CD38,2 Year Business CD,THE MARY A WODZIAK TRUST,ACT,2004-09-13,2024-08-12,135579.79,0.015,1011448.0,,BCSB - GREENVILLE,TIFFANY J. CAHILL,2023-01-31
2544,103236,TD,CD38,2 Year Business CD,THE MARY A WODZIAK TRUST,ACT,2004-09-13,2024-08-12,135752.62,0.015,1011448.0,,BCSB - GREENVILLE,TIFFANY J. CAHILL,2023-02-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3644243,151131621,TD,CD13,3 Month Prime Time CD,ROBERT AND MARILYN GREENE SUPPLEMENTAL NEEDS T...,ACT,2024-12-27,2025-03-28,185255.27,0.0415,1017603.0,,BCSB - REHOBOTH BRANCH,FRANK P. WILHELM,2024-12-31
3644339,151132695,TD,CD07,3 Month Business CD,"ATTLEBORO AREA INDUSTRIAL MUSEUM, INC.",ACT,2024-12-31,2025-04-01,25000,0.0415,1002768.0,,BCSB - ATTLEBORO BRANCH,MICHAEL A. HEY,2024-12-31
3663629,3550000593,TD,CD35,1 Year Business CD,CASTLE MORTGAGE BROKERAGE INC,ACT,2008-08-20,2025-06-19,69934.04,0.045,1000611.0,,BCSB - NB ASHLEY BLVD BRANCH,MARLENE C. LIRA,2024-10-31
3663630,3550000593,TD,CD35,1 Year Business CD,CASTLE MORTGAGE BROKERAGE INC,ACT,2008-08-20,2025-06-19,70201.08,0.045,1000611.0,,BCSB - NB ASHLEY BLVD BRANCH,MARLENE C. LIRA,2024-11-29


- Here we can see that the records with values in 'datemat' field are time deposits (bank CDs)

In [31]:
acctcommon['product'].unique()

array(['Community Checking', 'Simple Business Checking',
       'Business Money Market', 'Business Checking',
       'Business Premium Plus MoneyMkt', 'Statement Savings',
       'Muni Money Market', 'Prime Time Platinum Money Mkt',
       '5 Year Prime Time CD', 'Municipal Now', 'Prime Time Checking',
       'Prime Time Elite Money Market', 'Money Market Statement',
       'Business Elite Money Market', '2 Year Business CD',
       '1st Choice Business Checking', '15 Month Prime Time CD',
       'Municipal Money Market', 'BCSB High Yield Savings ',
       'IOLTA Checking', 'BCSB High Yield Checking ',
       '1 Year Business CD', 'BCSB Business High Yield MMKT',
       '2 Year Municipal CD', '1 Year Municipal CD',
       'eChecking (18 & over)', 'Business >12 Month CmpDly CD',
       'Business <12 Month Simple CD', '6 Month CD', 'Personal Checking',
       '6 Month Business CD', '15 Month Business CD',
       '3 Year Business CD', '3 Month Business CD',
       '6 Month Business CD - C

In [32]:
acctcommon = acctcommon[acctcommon['product'].isin(['Simple Business Checking','1st Choice Business Checking','Business Checking','Community Checking','IOLTA Checking'])].copy()

In [33]:
acctcommon['product'].unique()

array(['Community Checking', 'Simple Business Checking',
       'Business Checking', '1st Choice Business Checking',
       'IOLTA Checking'], dtype=object)

### Skipping Business TIN because we can unique customers better with true ORGNBR linked to active accounts

In [34]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 116833 entries, 270 to 3691762
Data columns (total 15 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   acctnbr           116833 non-null  int64         
 1   mjaccttypcd       116833 non-null  object        
 2   currmiaccttypcd   116833 non-null  object        
 3   product           116833 non-null  object        
 4   ownersortname     116833 non-null  object        
 5   curracctstatcd    116833 non-null  object        
 6   contractdate      116821 non-null  datetime64[ns]
 7   datemat           0 non-null       datetime64[ns]
 8   bookbalance       116833 non-null  object        
 9   noteintrate       116833 non-null  object        
 10  taxrptfororgnbr   116833 non-null  float64       
 11  taxrptforpersnbr  0 non-null       float64       
 12  branchname        116833 non-null  object        
 13  acctofficer       116764 non-null  object        
 14  effdat

In [35]:
df = acctcommon.copy()

In [36]:
file_path = r'Z:\Chad Projects\Ad Hoc Reports\BusinessDepositAccts_Dave_121324\Production\Output\business_deposits_tin_raw_data.xlsx'
df.to_excel(file_path, engine='openpyxl', index=False)

- Complete. Awaiting feedback.