In [17]:
# Report Request - Dave Medieros & Stephanie Nordberg (Business Deposit Accts)
# Developed by CD
# v1.0.3

### Notes:
Dave needs me to filter to only
- Simple Bus Checking
- 1st Choice Checking
- Business Checking
- IOLTA Checking
- Community Checking.


In [1]:


import os
from io import StringIO
from datetime import datetime, timedelta, date
from sqlalchemy import create_engine, text
import pandas as pd
import time
from cryptography.fernet import Fernet
from dotenv import load_dotenv
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine
import nest_asyncio

nest_asyncio.apply()


def retrieve_data():
    """
    Retrieve data from COCC database
    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path
            
            # Load private key
            key_key_path = r'C:\Users\w322800\Documents\coding3\env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'C:\Users\w322800\Documents\coding3\env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'C:\Users\w322800\Documents\coding3\env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        finally:
            await db_handler.close()

    def run_sql_queries():
        # lookup table
        # Engine 1
        lookup_df = text("""
        SELECT 
            *
        FROM 
            sys.all_tab_columns col
        """)

        # acctcommon
        # engine 1
        acctcommon = text("""
        SELECT 
            a.ACCTNBR,
            a.MJACCTTYPCD,
            a.CURRMIACCTTYPCD,
            a.PRODUCT,
            a.OWNERSORTNAME,
            a.CURRACCTSTATCD,
            a.CONTRACTDATE,
            a.DATEMAT,
            a.BOOKBALANCE,
            a.NOTEINTRATE,
            a.TAXRPTFORORGNBR,
            a.TAXRPTFORPERSNBR,
            a.BRANCHNAME
        FROM 
            OSIBANK.WH_ACCTCOMMON a
        """)

        queries = [
            {'key':'lookup_df', 'sql':lookup_df, 'engine':1},
            {'key':'acctcommon', 'sql':acctcommon, 'engine':1}

        ]
        return asyncio.run(fetch_data(queries))
    
    data = run_sql_queries()
    return data

In [2]:
data = retrieve_data()

In [11]:
acctcommon = data['acctcommon'].copy()

In [12]:
acctcommon = acctcommon[acctcommon['curracctstatcd'].isin(['ACT','DORM'])]

In [13]:
acctcommon = acctcommon[acctcommon['mjaccttypcd'].isin(['CK','SAV','TD'])]

In [14]:
acctcommon = acctcommon[~acctcommon['taxrptfororgnbr'].isnull()]

In [7]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8893 entries, 48 to 163140
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   acctnbr           8893 non-null   int64         
 1   mjaccttypcd       8893 non-null   object        
 2   currmiaccttypcd   8893 non-null   object        
 3   product           8893 non-null   object        
 4   ownersortname     8893 non-null   object        
 5   curracctstatcd    8893 non-null   object        
 6   contractdate      8892 non-null   datetime64[ns]
 7   datemat           507 non-null    datetime64[ns]
 8   bookbalance       8893 non-null   object        
 9   noteintrate       8893 non-null   object        
 10  taxrptfororgnbr   8893 non-null   float64       
 11  taxrptforpersnbr  0 non-null      float64       
 12  branchname        8893 non-null   object        
dtypes: datetime64[ns](2), float64(2), int64(1), object(8)
memory usage: 972.7+ KB


In [9]:
acctcommon[~acctcommon['datemat'].isnull()]

- Here we can see that the records with values in 'datemat' field are time deposits (bank CDs)

In [17]:
acctcommon['product'].unique()

array(['Prime Time Elite Money Market', 'Business Elite Money Market',
       'Simple Business Checking', 'Business Checking',
       'Community Checking', '1st Choice Business Checking',
       'Business Money Market', 'NOW Checking', 'Statement Savings',
       'IOLTA Checking', 'Municipal Money Market',
       'BCSB High Yield Checking ', 'Municipal Now', 'Muni Money Market',
       '5 Year Prime Time CD', 'Prime Time Checking', 'Personal Checking',
       '1 Year Municipal CD', 'Business >12 Month CmpDly CD',
       '1 Year Business CD', '2 Year Business CD', '1 Year CD',
       'Municipal Checking', '9 MONTH MUNICIPAL CD',
       'Money Market Statement', '3 Year Business CD',
       '6 Month Business CD', '1 Year Prime Time CD',
       '3 Month Business CD', '9 Month Business CD',
       '6 Month Prime Time CD', '9 Month Prime Time CD',
       '4 Year Business CD', '5 Year Business CD', '3 Month Municipal CD',
       '2 Year CD', '2 Year Municipal CD', '6 Month Municipal CD',
   

In [18]:
acctcommon = acctcommon[acctcommon['product'].isin(['Simple Business Checking','1st Choice Business Checking','Business Checking','Community Checking','IOLTA Checking'])].copy()

In [19]:
acctcommon['product'].unique()

array(['Simple Business Checking', 'Business Checking',
       'Community Checking', '1st Choice Business Checking',
       'IOLTA Checking'], dtype=object)

### We need to get Business TAX ID to append

In [20]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5029 entries, 78 to 163130
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   acctnbr           5029 non-null   int64         
 1   mjaccttypcd       5029 non-null   object        
 2   currmiaccttypcd   5029 non-null   object        
 3   product           5029 non-null   object        
 4   ownersortname     5029 non-null   object        
 5   curracctstatcd    5029 non-null   object        
 6   contractdate      5028 non-null   datetime64[ns]
 7   datemat           0 non-null      datetime64[ns]
 8   bookbalance       5029 non-null   object        
 9   noteintrate       5029 non-null   object        
 10  taxrptfororgnbr   5029 non-null   float64       
 11  taxrptforpersnbr  0 non-null      float64       
 12  branchname        5029 non-null   object        
dtypes: datetime64[ns](2), float64(2), int64(1), object(8)
memory usage: 550.0+ KB


In [21]:
# Async SQLConnector
# Developed by CD
# v0.0.1

# Improved connector to SQL database that can concurrently access tables instead of processing them sequentially

import os
from io import StringIO
from datetime import datetime, timedelta, date
from sqlalchemy import create_engine, text
import pandas as pd
import time
from cryptography.fernet import Fernet
from dotenv import load_dotenv
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine
import nest_asyncio

nest_asyncio.apply()


def retrieve_data():
    """
    Retrieve data from COCC database
    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path
            
            # Load private key
            key_key_path = r'C:\Users\w322800\Documents\coding3\env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'C:\Users\w322800\Documents\coding3\env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'C:\Users\w322800\Documents\coding3\env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        finally:
            await db_handler.close()

    def run_sql_queries():
        # lookup table
        # Engine 1
        lookup_df = text("""
        SELECT 
            *
        FROM 
            sys.all_tab_columns col
        """)

        # acctcommon
        # engine 1
        vieworgtaxid = text("""
        SELECT 
            a.ORGNBR,
            a.TAXID
        FROM 
            OSIBANK.VIEWORGTAXID a
        """)

        acctcommon2 = text("""
        SELECT
            a.ACCTNBR,
            a.ACCTOFFICER
        FROM
            OSIBANK.WH_ACCTCOMMON a
        """)

        queries = [
            # {'key':'lookup_df', 'sql':lookup_df, 'engine':1},
            {'key':'vieworgtaxid', 'sql':vieworgtaxid, 'engine':1},
            {'key':'acctcommon2', 'sql':acctcommon2, 'engine':1}

        ]
        return asyncio.run(fetch_data(queries))
    
    data = run_sql_queries()
    return data

In [22]:
data = retrieve_data()

In [23]:
vieworgtaxid = data['vieworgtaxid'].copy()
acctcommon2 = data['acctcommon2'].copy()

In [24]:
df = pd.merge(acctcommon, vieworgtaxid, left_on='taxrptfororgnbr', right_on='orgnbr', how='left')

In [25]:
df = pd.merge(df, acctcommon2, on='acctnbr', how='left')

In [26]:
df

Unnamed: 0,acctnbr,mjaccttypcd,currmiaccttypcd,product,ownersortname,curracctstatcd,contractdate,datemat,bookbalance,noteintrate,taxrptfororgnbr,taxrptforpersnbr,branchname,orgnbr,taxid,acctofficer
0,103344,CK,CK25,Simple Business Checking,THE PAWS GROUP LLC,ACT,2007-03-29,NaT,297282.96,0,1010449.0,,BCSB - GREENVILLE,1010449.0,208634880,LAURA A. STACK
1,102372,CK,CK25,Simple Business Checking,FENJA LLC,ACT,2005-03-07,NaT,5927.44,0,1010401.0,,BCSB - CUMBERLAND,1010401.0,753184096,JUSTIN A. JEFFREY
2,103072,CK,CK25,Simple Business Checking,GREENVILLE COMMON PROPERTIES,ACT,2006-06-15,NaT,4565.63,0,1010487.0,,BCSB - GREENVILLE,1010487.0,100001450,TIFFANY J. CAHILL
3,102488,CK,CK25,Simple Business Checking,PICKLES PLUMBING & HEATING LLC,ACT,2005-04-27,NaT,9303.81,0,1010387.0,,BCSB - GREENVILLE,1010387.0,261676023,TIFFANY J. CAHILL
4,102824,CK,CK25,Simple Business Checking,N R BEAUDETTE & SONS CONT,ACT,2005-12-29,NaT,5940.35,0,1010645.0,,BCSB - GREENVILLE,1010645.0,039449390,TIFFANY J. CAHILL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5024,27027848,CK,CK12,Business Checking,COLONIAL FORD,ACT,2018-02-12,NaT,3400,0,554.0,,BCSB - MAIN OFFICE,554.0,043575965,JACQUELINE A. THEIS
5025,151118984,CK,CK25,Simple Business Checking,TWO NINETEEN ARNOLD LLC,ACT,2024-11-08,NaT,3000,0,1017435.0,,BCSB - CANDLEWORKS BRANCH,1017435.0,993337754,ANDREW RODRIGUES
5026,151117960,CK,CK25,Simple Business Checking,"REP SOCIETY, LLC",ACT,2024-11-06,NaT,80260.7,0,1017422.0,,BCSB - MAIN OFFICE,1017422.0,331696099,JEFFREY M. VIALL
5027,151118033,CK,CK25,Simple Business Checking,LIGHTHOUSE WEST TRUST LLC,ACT,2024-11-06,NaT,938.82,0,1017424.0,,BCSB - CANDLEWORKS BRANCH,1017424.0,331821700,MONICA A. FURTADO


In [27]:
file_path = r'Z:\Chad Projects\Ad Hoc Reports\BusinessDepositAccts_Dave_121324\Production\Output\business_deposits_tin_raw_data.xlsx'
df.to_excel(file_path, engine='openpyxl', index=False)

- Complete. Awaiting feedback.