In [1]:
# Async Connector
# Developed by CD
# v2.0.0-prod

from io import StringIO
import time
import numpy as np
import os
from datetime import datetime, timedelta, date
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import text
from typing import List
from collections import defaultdict, Counter
import pandas as pd
from cryptography.fernet import Fernet
from dotenv import load_dotenv
from io import StringIO
from pathlib import Path
import asyncio
import nest_asyncio
import sys
from typing import Dict, Union, List
nest_asyncio.apply()

if sys.platform == "win32":
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())


def retrieve_data(queries: List[Dict[str, Union[str, pd.DataFrame, int]]]) -> Dict[str, pd.DataFrame]:
    """
    Retrieve data from Oracle Database (COCC)

    Args:
        queries (List): pass list of queries in specific format
            - List[Dict[str, Union[str, pd.DataFrame, int]]]
    
    Returns:
        data (Dict): Returns a dictionary with df name and the df attached as key/value pair.   

    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path

            project_root = os.getcwd()
            
            # Load private key
            key_key_path = 'env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        except Exception as e:
            print(f"Error")
            raise
        finally:
            await db_handler.close()

    def run_sql_queries(queries):

        async def run_queries():
            return await fetch_data(queries)
        
        loop = asyncio.get_event_loop()
        if loop.is_running():
            return loop.run_until_complete(run_queries())
        else:
            return asyncio.run(run_queries())
        
    data = run_sql_queries(queries)
    
    return data

In [4]:
# lookup table
# Engine 1
lookup_df = text("""
SELECT 
    *
FROM 
    sys.all_tab_columns col
""")

queries = [
    # {'key':'acctcommon', 'sql':acctcommon, 'engine':2},
    {'key':'lookup_df', 'sql':lookup_df, 'engine':2},
]

data = retrieve_data(queries)
lookup_df = data['lookup_df'].copy()

In [5]:
lookup_df

Unnamed: 0,owner,table_name,column_name,data_type,data_type_mod,data_type_owner,data_length,data_precision,data_scale,nullable,...,char_used,v80_fmt_image,data_upgraded,histogram,default_on_null,identity_column,evaluation_edition,unusable_before,unusable_beginning,collation
0,XDB,XDB$IMPORT_TT_INFO,ID,RAW,,,8,,,Y,...,,NO,YES,NONE,NO,NO,,,,
1,XDB,XDB$IMPORT_TT_INFO,FLAGS,RAW,,,4,,,Y,...,,NO,YES,NONE,NO,NO,,,,
2,XDB,XDB$IMPORT_TT_INFO,LOCALNAME,VARCHAR2,,,2000,,,Y,...,B,NO,YES,NONE,NO,NO,,,,USING_NLS_COMP
3,XDB,XDB$IMPORT_TT_INFO,NMSPCID,RAW,,,8,,,Y,...,,NO,YES,NONE,NO,NO,,,,
4,XDB,XDB$IMPORT_TT_INFO,GUID,RAW,,,16,,,Y,...,,NO,YES,NONE,NO,NO,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28688,SYS,USER_XML_SCHEMA_ELEMENTS,GLOBAL,RAW,,,1,,,Y,...,,NO,YES,NONE,NO,NO,,,,
28689,SYS,KU$_ASSOC_VIEW,OBJ_TYPE,NUMBER,,,22,,,Y,...,,NO,YES,NONE,NO,NO,,,,
28690,SYS,EXU8VEWU,DEFER,NUMBER,,,22,,,Y,...,,NO,YES,NONE,NO,NO,,,,
28691,SYS,KU$_USER_VIEW,VERS_MAJOR,CHAR,,,1,,,Y,...,B,NO,YES,NONE,NO,NO,,,,USING_NLS_COMP


In [None]:
# lookup table
# Engine 1
acctcommon = text("""
SELECT 
    *
FROM 
    OSIBANK.PERS
""")

queries = [
    {'key':'acctcommon', 'sql':acctcommon, 'engine':1},
    # {'key':'wh_pers', 'sql':wh_pers, 'engine':1},
    # {'key':'loans', 'sql':loans, 'engine':1},
    # {'key':'househldacct', 'sql':househldacct, 'engine':1},
    # {'key':'allroles', 'sql':allroles, 'engine':1},
    # {'key':'persaddruse', 'sql':persaddruse, 'engine':1},
    # {'key':'wh_addr', 'sql':wh_addr, 'engine':1},
    # {'key':'pers', 'sql':pers, 'engine':1},
    # {'key':'acctstatistichist', 'sql':acctstatistichist, 'engine':1},
    # {'key':'acctloanlimithist', 'sql':acctloanlimithist, 'engine':1},
]

data = retrieve_data(queries)
pers = data['pers'].copy()



In [5]:
pers

Unnamed: 0,persnbr,spousepersnbr,restaxctrycd,crcd,salucd,lastname,lastnamesndx,firstname,firstnamesndx,mdlinit,...,firstnameupper,rpt1042syn,graddate,creditreportconsinfocd,privacyyn,purgeyn,shortname,prefculturecd,secondsurname,deathnotificationdate
0,127,,USA,,,COCC,C200,DEPOSIT8,D123,,...,DEPOSIT8,N,NaT,,N,N,,,,NaT
1,180,,USA,,,COCC,C200,ROCH2,R200,,...,ROCH2,N,NaT,,N,N,,,,NaT
2,181,,USA,,,COCC,C200,ROCH3,R200,,...,ROCH3,N,NaT,,N,N,,,,NaT
3,182,,USA,,,COCC,C200,ROCH4,R200,,...,ROCH4,N,NaT,,N,N,,,,NaT
4,184,,USA,,,COCC,C200,PRINT6,P653,,...,PRINT6,N,NaT,,N,N,,,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163959,1167967,,USA,,,RAMDHANY CORREA,R535,JESSIE,J200,,...,JESSIE,N,NaT,,N,N,,,,NaT
163960,1167973,,USA,,,MURPHY,M610,THOMAS,T520,G,...,THOMAS,N,NaT,,N,N,,,,NaT
163961,1167983,,USA,,,COLON TORRES,C453,VICTOR,V236,M,...,VICTOR,N,NaT,,N,N,,,,NaT
163962,1167985,,USA,,,DEFONTES,D153,JOHN,J500,R,...,JOHN,N,NaT,,N,N,,,,NaT


In [3]:
acctloan = text("""
SELECT 
    *
FROM 
    OSIBANK.WH_ACCTLOAN
""")

queries = [
    {'key':'acctloan', 'sql':acctloan, 'engine':1},
    # {'key':'wh_pers', 'sql':wh_pers, 'engine':1},
    # {'key':'loans', 'sql':loans, 'engine':1},
    # {'key':'househldacct', 'sql':househldacct, 'engine':1},
    # {'key':'allroles', 'sql':allroles, 'engine':1},
    # {'key':'persaddruse', 'sql':persaddruse, 'engine':1},
    # {'key':'wh_addr', 'sql':wh_addr, 'engine':1},
    # {'key':'pers', 'sql':pers, 'engine':1},
    # {'key':'acctstatistichist', 'sql':acctstatistichist, 'engine':1},
    # {'key':'acctloanlimithist', 'sql':acctloanlimithist, 'engine':1},
]

data = retrieve_data(queries)
acctloan = data['acctloan'].copy()

In [None]:
acctloan

In [3]:
lookup_df[lookup_df['column_name'].str.contains('credlimitclat',case=False,na=False)]

Unnamed: 0,owner,table_name,column_name,data_type,data_type_mod,data_type_owner,data_length,data_precision,data_scale,nullable,...,char_used,v80_fmt_image,data_upgraded,histogram,default_on_null,identity_column,evaluation_edition,unusable_before,unusable_beginning,collation
1629,COCCDM,WH_ACCTLOAN,CREDLIMITCLATRESEFFDATE,DATE,,,7,,,Y,...,,NO,YES,NONE,NO,NO,,,,
1630,COCCDM,WH_ACCTLOAN,CREDLIMITCLATRESAMT,NUMBER,,,22,22.0,3.0,Y,...,,NO,YES,NONE,NO,NO,,,,
1748,COCCDM,WH_ACCTLOAN_TEMP,CREDLIMITCLATRESEFFDATE,DATE,,,7,,,Y,...,,NO,YES,NONE,NO,NO,,,,
1749,COCCDM,WH_ACCTLOAN_TEMP,CREDLIMITCLATRESAMT,NUMBER,,,22,22.0,3.0,Y,...,,NO,YES,NONE,NO,NO,,,,
5621,COCCDM,WH_ACCTLOAN_ME,CREDLIMITCLATRESEFFDATE,DATE,,,7,,,Y,...,,NO,YES,NONE,NO,NO,,,,
5622,COCCDM,WH_ACCTLOAN_ME,CREDLIMITCLATRESAMT,NUMBER,,,22,22.0,3.0,Y,...,,NO,YES,NONE,NO,NO,,,,


In [4]:
prop_tables = lookup_df[lookup_df['table_name'].str.contains('prop',case=False,na=False)]

In [6]:
prop_tables['table_name'].unique()

array(['ACCTPROPINS', 'AU_XFR_ENTCHANGESETPROPERTY_V',
       'AU_XFR_ENTITYCHANGESETPROPERTY', 'AU_XFR_ENTITYPROPERTY',
       'PROPUSERFIELD', 'WH_PROP', 'WH_PROP2', 'WH_PROPUSERFIELDS',
       'VIEW_WH_PROPUSERFIELDS', 'ALL_PROPAGATION', 'KU$_MVPROP_VIEW',
       'ALL_METADATA_PROPERTIES', 'KU$_REFPARTTABPROP_VIEW',
       'CDB_DBFS_HS_FIXED_PROPERTIES', 'DATABASE_PROPERTIES',
       'USER_METADATA_PROPERTIES', 'USER_DBFS_HS_FIXED_PROPERTIES',
       'KU$_TABPROP_VIEW', 'KU$_MVLPROP_VIEW', 'KU$_PFHTABPROP_VIEW',
       'DBA_DBFS_HS_FIXED_PROPERTIES', 'KU$_VIEWPROP_VIEW',
       'USER_DBFS_HS_PROPERTIES', 'KU$_MZPROP_VIEW'], dtype=object)

In [7]:
prop_tables[prop_tables['table_name'] == 'ACCTPROPINS']

Unnamed: 0,owner,table_name,column_name,data_type,data_type_mod,data_type_owner,data_length,data_precision,data_scale,nullable,...,char_used,v80_fmt_image,data_upgraded,histogram,default_on_null,identity_column,evaluation_edition,unusable_before,unusable_beginning,collation
88,COCCDM,ACCTPROPINS,PROPNBR,NUMBER,,,22,22.0,0.0,N,...,,NO,YES,HEIGHT BALANCED,NO,NO,,,,
89,COCCDM,ACCTPROPINS,LENDERFUNDEDYN,CHAR,,,1,,,N,...,B,NO,YES,NONE,NO,NO,,,,USING_NLS_COMP
90,COCCDM,ACCTPROPINS,INTRPOLICYNBR,NUMBER,,,22,22.0,0.0,N,...,,NO,YES,NONE,NO,NO,,,,
91,COCCDM,ACCTPROPINS,INACTIVEDATE,DATE,,,7,,,Y,...,,NO,YES,NONE,NO,NO,,,,
92,COCCDM,ACCTPROPINS,ESCROWYN,CHAR,,,1,,,N,...,B,NO,YES,NONE,NO,NO,,,,USING_NLS_COMP
93,COCCDM,ACCTPROPINS,EFFDATE,DATE,,,7,,,Y,...,,NO,YES,NONE,NO,NO,,,,
94,COCCDM,ACCTPROPINS,DATELASTMAINT,DATE,,,7,,,N,...,,NO,YES,NONE,NO,NO,,,,
95,COCCDM,ACCTPROPINS,ACCTNBR,NUMBER,,,22,22.0,0.0,N,...,,NO,YES,NONE,NO,NO,,,,


In [11]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent.parent))
import main

data = main.fetch_data()
file_path = Path(r'Z:\Chad Projects\Monthly Reports\Automated Linda Reports\NonOwnerOcc\Production\output')


ModuleNotFoundError: No module named 'main'

In [12]:
os.getcwd()

'z:\\Chad Projects\\Monthly Reports\\Automated Linda Reports\\NonOwnerOcc\\Production\\src\\data_connection'