In [1]:
# Report Request - Dave Medieros & Stephanie Nordberg (Business Deposit Accts)
# Developed by CD
# v1.0.0

In [18]:
# Async SQLConnector
# Developed by CD
# v0.0.1

# Improved connector to SQL database that can concurrently access tables instead of processing them sequentially

import os
from io import StringIO
from datetime import datetime, timedelta, date
from sqlalchemy import create_engine, text
import pandas as pd
import time
from cryptography.fernet import Fernet
from dotenv import load_dotenv
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine
import nest_asyncio

nest_asyncio.apply()


def retrieve_data():
    """
    Retrieve data from COCC database
    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path
            
            # Load private key
            key_key_path = r'C:\Users\w322800\Documents\coding3\env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'C:\Users\w322800\Documents\coding3\env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'C:\Users\w322800\Documents\coding3\env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        finally:
            await db_handler.close()

    def run_sql_queries():
        # lookup table
        # Engine 1
        lookup_df = text("""
        SELECT 
            *
        FROM 
            sys.all_tab_columns col
        """)

        # acctcommon
        # engine 1
        acctcommon = text("""
        SELECT 
            a.ACCTNBR,
            a.MJACCTTYPCD,
            a.CURRMIACCTTYPCD,
            a.PRODUCT,
            a.OWNERSORTNAME,
            a.CURRACCTSTATCD,
            a.CONTRACTDATE,
            a.DATEMAT,
            a.BOOKBALANCE,
            a.NOTEINTRATE,
            a.TAXRPTFORORGNBR,
            a.TAXRPTFORPERSNBR
        FROM 
            OSIBANK.WH_ACCTCOMMON a
        """)

        queries = [
            # {'key':'lookup_df', 'sql':lookup_df, 'engine':1},
            {'key':'acctcommon', 'sql':acctcommon, 'engine':1}

        ]
        return asyncio.run(fetch_data(queries))
    
    data = run_sql_queries()
    return data

In [19]:
data = retrieve_data()

In [20]:
acctcommon = data['acctcommon'].copy()

In [21]:
acctcommon = acctcommon[acctcommon['curracctstatcd'].isin(['ACT','DORM'])]

In [22]:
acctcommon = acctcommon[acctcommon['mjaccttypcd'].isin(['CK','SAV','TD'])]

In [23]:
acctcommon = acctcommon[~acctcommon['taxrptfororgnbr'].isnull()]

In [24]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8896 entries, 23 to 162662
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   acctnbr           8896 non-null   int64         
 1   mjaccttypcd       8896 non-null   object        
 2   currmiaccttypcd   8896 non-null   object        
 3   product           8896 non-null   object        
 4   ownersortname     8896 non-null   object        
 5   curracctstatcd    8896 non-null   object        
 6   contractdate      8895 non-null   datetime64[ns]
 7   datemat           511 non-null    datetime64[ns]
 8   bookbalance       8896 non-null   object        
 9   noteintrate       8896 non-null   object        
 10  taxrptfororgnbr   8896 non-null   float64       
 11  taxrptforpersnbr  0 non-null      float64       
dtypes: datetime64[ns](2), float64(2), int64(1), object(7)
memory usage: 903.5+ KB


In [25]:
acctcommon[~acctcommon['datemat'].isnull()]

Unnamed: 0,acctnbr,mjaccttypcd,currmiaccttypcd,product,ownersortname,curracctstatcd,contractdate,datemat,bookbalance,noteintrate,taxrptfororgnbr,taxrptforpersnbr
157,7105320,TD,CD31,1 Year Business CD,HAROLD C ENGLAND JR IRRV TR,ACT,2015-05-27,2025-06-08,21726.84,0.045,1005405.0,
495,151052306,TD,CD17,6 Month Business CD,"JAYSAN GAS SERVICE, INC.",ACT,2024-05-21,2025-05-20,21709.94,0.042,1008723.0,
1526,7203622,TD,CD59,5 Year Business CD,ANAWAN-OAKTON GRANGE #221 P OF H INC,ACT,2001-01-25,2025-01-22,12056.18,0.0149,1002477.0,
1628,150289439,TD,CD25,6 Month Prime Time CD,CAROL ANN TEIXEIRA TRUST,ACT,2019-02-25,2025-02-22,102491.4,0.0475,1002839.0,
1773,150846528,TD,CD33,1 Year CD,MICHAEL EDWARD WATERMAN TR UW,ACT,2023-01-13,2025-04-14,184490.42,0.0425,1015218.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
160914,151065119,TD,CD75,9 MONTH MUNICIPAL CD,TOWN OF REHOBOTH,ACT,2024-06-18,2025-03-19,1000000,0.05,1002903.0,
161443,151013770,TD,CD17,6 Month Business CD,TAUNTON STOVE COMPANY INC,ACT,2024-02-29,2025-02-27,310748.83,0.0475,1000036.0,
161682,150832486,TD,CD30,1 Year Municipal CD,BRISTOL COMMUNITY COLLEGE,ACT,2022-12-19,2025-03-20,474184.45,0.045,1005422.0,
162317,151053784,TD,CD07,3 Month Business CD,"VALENTINE TOOL & STAMPING, INC.",ACT,2024-05-24,2025-02-21,759708.09,0.0445,1003329.0,


- Here we can see that the records with values in 'datemat' field are time deposits (bank CDs)

### We need to get Business TAX ID to append

In [26]:
# Async SQLConnector
# Developed by CD
# v0.0.1

# Improved connector to SQL database that can concurrently access tables instead of processing them sequentially

import os
from io import StringIO
from datetime import datetime, timedelta, date
from sqlalchemy import create_engine, text
import pandas as pd
import time
from cryptography.fernet import Fernet
from dotenv import load_dotenv
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine
import nest_asyncio

nest_asyncio.apply()


def retrieve_data():
    """
    Retrieve data from COCC database
    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path
            
            # Load private key
            key_key_path = r'C:\Users\w322800\Documents\coding3\env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'C:\Users\w322800\Documents\coding3\env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'C:\Users\w322800\Documents\coding3\env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        finally:
            await db_handler.close()

    def run_sql_queries():
        # lookup table
        # Engine 1
        lookup_df = text("""
        SELECT 
            *
        FROM 
            sys.all_tab_columns col
        """)

        # acctcommon
        # engine 1
        vieworgtaxid = text("""
        SELECT 
            a.ORGNBR,
            a.TAXID
        FROM 
            OSIBANK.VIEWORGTAXID a
        """)

        queries = [
            # {'key':'lookup_df', 'sql':lookup_df, 'engine':1},
            {'key':'vieworgtaxid', 'sql':vieworgtaxid, 'engine':1}

        ]
        return asyncio.run(fetch_data(queries))
    
    data = run_sql_queries()
    return data

In [27]:
data = retrieve_data()

In [29]:
vieworgtaxid = data['vieworgtaxid'].copy()

In [30]:
df = pd.merge(acctcommon, vieworgtaxid, left_on='taxrptfororgnbr', right_on='orgnbr', how='left')

In [31]:
df

Unnamed: 0,acctnbr,mjaccttypcd,currmiaccttypcd,product,ownersortname,curracctstatcd,contractdate,datemat,bookbalance,noteintrate,taxrptfororgnbr,taxrptforpersnbr,orgnbr,taxid
0,4465813007,SAV,SV02,Statement Savings,MILTON GEORGE ESTATE,ACT,2017-01-26,NaT,4326.08,0.0005,1004667.0,,1004667.0,817053757
1,27068560,CK,CK25,Simple Business Checking,BRAGA TRANSPORTATION INC,ACT,2014-08-08,NaT,1446.41,0,1001686.0,,1001686.0,042982782
2,63115506,CK,CK19,Business Money Market,COYLE HS 68 CLASS REUNION,DORM,2018-02-22,NaT,1433.43,0.0026,1005216.0,,1005216.0,824476708
3,150972779,CK,CK36,ICS Shadow - Muni - Demand,TOWN OF MEDWAY,ACT,2023-11-14,NaT,11256.34,0,1004141.0,,1004141.0,046001217
4,26181368,CK,CK13,Community Checking,CHARLES AND ROSEMARY MORGAN CORRIGAN,ACT,2002-06-20,NaT,33697.3,0.0005,1002829.0,,1002829.0,042760750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8891,150996539,CK,CK36,ICS Shadow - Muni - Demand,TOWN OF ACUSHNET,ACT,2024-01-19,NaT,155707.42,0,1003925.0,,1003925.0,046001063
8892,27069354,CK,CK13,Community Checking,DARTMOUTH FRIENDS OF THE ELDERLY INC,DORM,2014-04-08,NaT,10753.3,0.0005,1003611.0,,1003611.0,043033727
8893,60104023,CK,CK18,Municipal Money Market,CITY OF NEW BEDFORD,ACT,2010-05-17,NaT,0,0.035,1003923.0,,1003923.0,046001402
8894,151102987,CK,CK42,ICS Shadow - Muni - MMDA/SAV,TOWN OF EASTON,ACT,2024-09-20,NaT,97427.23,0,1003879.0,,1003879.0,046001142


In [32]:
file_path = r'Z:\Chad Projects\Ad Hoc Reports\BusinessDepositAccts_Dave_121324\Production\Output\business_deposits_tin_raw_data.xlsx'
df.to_excel(file_path, engine='openpyxl', index=False)

- Complete. Awaiting feedback.