In [1]:
# Trial Balance (Operations) - Multiple Properties per Acctnbr
# Developed by CD
# v2.0.1-prod

In [1]:
# Async Connector
# Developed by CD

from io import StringIO
import time
import numpy as np
import os
from datetime import datetime, timedelta, date
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import text
from typing import List
from collections import defaultdict, Counter
import pandas as pd
from cryptography.fernet import Fernet
from dotenv import load_dotenv
from io import StringIO
from pathlib import Path
import asyncio
import nest_asyncio
import sys
import win32com.client as win32
nest_asyncio.apply()

if sys.platform == "win32":
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())


def retrieve_data():
    """
    Retrieve data from COCC database
    """
    class DatabaseHandler:
        """
        This class abstracts the connection to the database and allows a clean
        interface for the developer to use.

        This connector can handle async queries

        """
        def __init__(self, tns_admin_path):
            """
            Args:
                tns_admin_path (str): Oracle driver path
                credentials_path_db1 (str): Database 1 credentials path
                credentials_path_db1 (str): Databsae 2 credentials path
            """
            os.environ['TNS_ADMIN'] = tns_admin_path
            
            # Load private key
            key_key_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Utility\env_admin\key.key'
            with open(key_key_path, "rb") as key_file:
                key = key_file.read()

            cipher = Fernet(key)
            
            # Load encrypted data
            encoded_env_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Utility\env_admin\.env.enc'
            with open(encoded_env_path, "rb") as encrypted_file:
                encrypted_data = encrypted_file.read()

            decrypted_data = cipher.decrypt(encrypted_data).decode()

            env_file = StringIO(decrypted_data)
            load_dotenv(stream=env_file)

            self.username1 = os.getenv('main_username')
            self.password1 = os.getenv('main_password')
            self.dsn1 = os.getenv('main_dsn')

            self.username2 = os.getenv('datamart_username')
            self.password2 = os.getenv('datamart_password')
            self.dsn2 = os.getenv('datamart_dsn')

            self.connection_string1 = f'oracle+oracledb://{self.username1}:{self.password1}@{self.dsn1}'
            self.connection_string2 = f'oracle+oracledb://{self.username2}:{self.password2}@{self.dsn2}'

            self.engine1 = create_async_engine(self.connection_string1, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True
            self.engine2 = create_async_engine(self.connection_string2, max_identifier_length=128, echo=False, future=True)
            self.engine1.dialect.hide_parameters = True


        async def query(self, sql_query, engine=1):
            """
            This allows abstraction of the connection and the class
            so the developer can query a single table as a dataframe

            Args:
                sql_query (str): The query to SQL database is passed as a string
                engine (int): This selects the database. There are two engines:
                    1 -> R1625
                    2 -> COCC DataMart

            Returns:
                df: The SQL query is returned as a pandas DataFrame

            Usage:
                df = db_handler.query("SELECT * FROM DB.TABLE", engine=1)

                In this example, db_handler = DatabaseHandler(args)
            """
            if engine == 1:
                selected_engine = self.engine1
            elif engine == 2:
                selected_engine = self.engine2
            else:
                raise ValueError("Engine must be 1 or 2")

            async with selected_engine.connect() as connection:
                result = await connection.execute(sql_query)
                rows = result.fetchall()
                if not rows:
                    return pd.DataFrame()
                df = pd.DataFrame(rows, columns=result.keys())
            return df

        async def close(self):
            if self.engine1:
                await self.engine1.dispose()
            if self.engine2:
                await self.engine2.dispose()


    # Database Connection Configuration
    tns_admin_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Utility\env_admin\tns_admin'
    db_handler = DatabaseHandler(tns_admin_path)

    async def fetch_data(queries):
        try:
            tasks = {query['key']: asyncio.create_task(db_handler.query(query['sql'], query['engine'])) for query in queries}
            results = await asyncio.gather(*tasks.values())
            return {key: df for key, df in zip(tasks.keys(), results)}
        except Exception as e:
            print(f"Error")
            raise
        finally:
            await db_handler.close()

    def run_sql_queries():
        # # lookup table
        # # Engine 1
        # lookup_df = text("""
        # SELECT 
        #     *
        # FROM 
        #     sys.all_tab_columns col
        # """)

        # acctcommon
        # engine 2
        acctcommon = text("""
        SELECT 
            a.ACCTNBR,
            a.MJACCTTYPCD,
            a.CURRMIACCTTYPCD,
            a.PRODUCT,
            a.CURRACCTSTATCD,
            a.NOTEINTRATE,
            a.NOTENEXTRATECHANGEDATE,
            a.NOTERATECHANGECALPERCD,
            a.NOTEOPENAMT,
            a.NOTEBAL,
            a.BOOKBALANCE,
            a.NOTEINTCALCSCHEDNBR,
            a.CALCBALTYPCD,
            a.INTMETHCD,
            a.RATETYPCD,
            a.INTBASE,
            a.DATEMAT,
            a.CONTRACTDATE,
            a.OWNERNAME
        FROM 
            COCCDM.WH_ACCTCOMMON_ME a
        """)

        # Acctloan, engine 2
        acctloan = text("""
        SELECT 
            a.ACCTNBR, 
            a.COBAL,
            a.ESCBAL, 
            a.PURPCD,
            a.FDICCATCD,
            a.DATE1STPMTDUE,
            a.MINRATECHANGEDOWN, 
            a.MAXRATECHANGEDOWN, 
            a.PREPAYCHARGE,
            a.LASTPAYMENTDATE,
            a.NOTEACCRUEDINT
        FROM 
            COCCDM.WH_ACCTLOAN_ME a
        """)

        loans = text("""
        SELECT 
            a.ACCTNBR, 
            a.AVAILBALAMT,
            a.INTPAIDTODATE,
            a.FDICCATDESC,
            a.LOANIDX
        FROM 
            COCCDM.WH_LOANS_ME a
        """)

        prop = text("""
        SELECT
            a.ACCTNBR,
            a.PROPNBR,
            a.APRSVALUEAMT,
            a.APRSDATE,
            a.PROPADDR1,
            a.PROPADDR2,
            a.PROPADDR3,
            a.PROPCITY,
            a.PROPSTATE,
            a.PROPZIP
        FROM
            OSIBANK.WH_PROP a
        """)

        prop2 = text("""
        SELECT
            a.ACCTNBR,
            a.PROPTYPDESC,
            a.PROPNBR,
            a.PROPVALUE,
            a.PROPTYPCD,
            a.PROPDESC
        FROM
            OSIBANK.WH_PROP2 a
        """)

        queries = [
            {'key':'acctcommon', 'sql':acctcommon, 'engine':2},
            {'key':'acctloan', 'sql': acctloan, 'engine':2},
            {'key':'loans', 'sql': loans, 'engine':2},
            {'key':'prop', 'sql': prop, 'engine':1},
            {'key':'prop2', 'sql': prop2, 'engine':1},
        ]

        async def run_queries():
            return await fetch_data(queries)
        
        loop = asyncio.get_event_loop()
        if loop.is_running():
            return loop.run_until_complete(run_queries())
        else:
            return asyncio.run(run_queries())
        
    data = run_sql_queries()
    
    return data

In [2]:
data = retrieve_data()

In [3]:
# Extract data
acctcommon = data['acctcommon'].copy()
acctloan = data['acctloan'].copy()
loans = data['loans'].copy()
prop = data['prop'].copy()
prop2 = data['prop2'].copy()

In [4]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163914 entries, 0 to 163913
Data columns (total 19 columns):
 #   Column                  Non-Null Count   Dtype         
---  ------                  --------------   -----         
 0   acctnbr                 163914 non-null  int64         
 1   mjaccttypcd             163914 non-null  object        
 2   currmiaccttypcd         163914 non-null  object        
 3   product                 163914 non-null  object        
 4   curracctstatcd          163914 non-null  object        
 5   noteintrate             161015 non-null  object        
 6   notenextratechangedate  5586 non-null    datetime64[ns]
 7   noteratechangecalpercd  52703 non-null   object        
 8   noteopenamt             155021 non-null  object        
 9   notebal                 163914 non-null  object        
 10  bookbalance             163914 non-null  object        
 11  noteintcalcschednbr     161015 non-null  float64       
 12  calcbaltypcd            140665

In [5]:
def filter_acctcommon(df):
    df = df.loc[df['mjaccttypcd'] == 'CML'].copy()
    df = df.loc[df['curracctstatcd'].isin(['ACT','NPFM'])].copy()
    return df

In [6]:
acctcommon = filter_acctcommon(acctcommon)

In [7]:
acctcommon.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3316 entries, 25 to 163904
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   acctnbr                 3316 non-null   int64         
 1   mjaccttypcd             3316 non-null   object        
 2   currmiaccttypcd         3316 non-null   object        
 3   product                 3316 non-null   object        
 4   curracctstatcd          3316 non-null   object        
 5   noteintrate             3316 non-null   object        
 6   notenextratechangedate  1149 non-null   datetime64[ns]
 7   noteratechangecalpercd  1449 non-null   object        
 8   noteopenamt             2574 non-null   object        
 9   notebal                 3316 non-null   object        
 10  bookbalance             3316 non-null   object        
 11  noteintcalcschednbr     3316 non-null   float64       
 12  calcbaltypcd            3316 non-null   object    

In [8]:
assert acctcommon['acctnbr'].is_unique, "Duplicates found"

In [9]:
assert acctloan['acctnbr'].is_unique, "Duplicates found"

In [10]:
assert loans['acctnbr'].is_unique, "Duplicates found"

In [11]:
def merging_loan_data(acctcommon, acctloan, loans):
    merged_df = pd.merge(acctcommon, acctloan, how='left', on='acctnbr')
    merged_df = pd.merge(merged_df, loans, how='left', on='acctnbr')
    return merged_df

In [12]:
merged_df = merging_loan_data(acctcommon, acctloan, loans)

In [13]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3316 entries, 0 to 3315
Data columns (total 33 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   acctnbr                 3316 non-null   int64         
 1   mjaccttypcd             3316 non-null   object        
 2   currmiaccttypcd         3316 non-null   object        
 3   product                 3316 non-null   object        
 4   curracctstatcd          3316 non-null   object        
 5   noteintrate             3316 non-null   object        
 6   notenextratechangedate  1149 non-null   datetime64[ns]
 7   noteratechangecalpercd  1449 non-null   object        
 8   noteopenamt             2574 non-null   object        
 9   notebal                 3316 non-null   object        
 10  bookbalance             3316 non-null   object        
 11  noteintcalcschednbr     3316 non-null   float64       
 12  calcbaltypcd            3316 non-null   object  

In [14]:
prop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82740 entries, 0 to 82739
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   acctnbr       82740 non-null  int64         
 1   propnbr       82740 non-null  int64         
 2   aprsvalueamt  11529 non-null  object        
 3   aprsdate      11529 non-null  datetime64[ns]
 4   propaddr1     12451 non-null  object        
 5   propaddr2     10 non-null     object        
 6   propaddr3     0 non-null      object        
 7   propcity      12452 non-null  object        
 8   propstate     12450 non-null  object        
 9   propzip       12423 non-null  object        
dtypes: datetime64[ns](1), int64(2), object(7)
memory usage: 6.3+ MB


In [15]:
prop2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82970 entries, 0 to 82969
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   acctnbr      82970 non-null  int64  
 1   proptypdesc  82970 non-null  object 
 2   propnbr      82970 non-null  int64  
 3   propvalue    5636 non-null   float64
 4   proptypcd    82970 non-null  object 
 5   propdesc     81963 non-null  object 
dtypes: float64(1), int64(2), object(3)
memory usage: 3.8+ MB


In [16]:
# assert prop['acctnbr'].is_unique, "Duplicates found"
# assert prop2['acctnbr'].is_unique, "Duplicates found"

In [17]:
def merging_core_with_prop(df, prop, prop2):
    merged_df = pd.merge(df, prop, how='left', on='acctnbr')
    merged_df = pd.merge(merged_df, prop2, how='left', on='acctnbr')
    return merged_df

In [18]:
merged_df = merging_core_with_prop(merged_df, prop, prop2)

In [19]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11081 entries, 0 to 11080
Data columns (total 47 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   acctnbr                 11081 non-null  int64         
 1   mjaccttypcd             11081 non-null  object        
 2   currmiaccttypcd         11081 non-null  object        
 3   product                 11081 non-null  object        
 4   curracctstatcd          11081 non-null  object        
 5   noteintrate             11081 non-null  object        
 6   notenextratechangedate  6212 non-null   datetime64[ns]
 7   noteratechangecalpercd  7616 non-null   object        
 8   noteopenamt             10078 non-null  object        
 9   notebal                 11081 non-null  object        
 10  bookbalance             11081 non-null  object        
 11  noteintcalcschednbr     11081 non-null  float64       
 12  calcbaltypcd            11081 non-null  object

In [20]:
merged_df['prepayment_penalty'] = np.where(merged_df['prepaycharge'] > 0, 'Y', 'N')

In [21]:
from dateutil.relativedelta import relativedelta

In [22]:
current_date = (date.today() + relativedelta(day=1) - relativedelta(days=1)).strftime('%Y%m%d')


In [23]:
# Output to excel
current_date = (date.today() + relativedelta(day=1) - relativedelta(days=1)).strftime('%Y%m%d') # Most recent ME
file_path = r'\\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Monthly Reports\Trial Balance Ops\Production\Output'
file_name = f'CML_Trial_Balance_Ops_MultipleProperties{current_date}.xlsx'
full_path = os.path.join(file_path, file_name)
merged_df.to_excel(full_path, sheet_name='Sheet1', engine='openpyxl', index=False)

try:
    excel = win32.Dispatch("Excel.Application")
    excel.Visible = False
    workbook = excel.Workbooks.Open(full_path)
    sheet = workbook.Worksheets("Sheet1")

    sheet.Columns.AutoFit()

    # Bold top row
    top_row = sheet.Rows(1)
    top_row.Font.Bold = True

    # Add bottom border to header row
    bottom_border = top_row.Borders(9)
    bottom_border.LineStyle = 1
    bottom_border.Weight = 2

    # date_columns = ["I","L","O","R","AA"]

    # for col in date_columns:
    #     col_index = column_to_index(col)
    #     sheet.Columns(col_index).NumberFormat = "mm/dd/yyyy"

    # Freeze top row
    sheet.Application.ActiveWindow.SplitRow = 1
    sheet.Application.ActiveWindow.FreezePanes = True

    workbook.Save()
    workbook.Close()

    print(f"Excel file saved with autofit at {full_path}")
finally:
    try:
        if 'workbook' in locals() and workbook is not None:
            workbook.Close(SaveChanges=False)
    except:
        pass
    try:
        if 'excel' in locals():
            excel.Quit()
    except:
        pass
    print("Excel process complete")

# Email
recipients = [
    # "chad.doorley@bcsbmail.com"
    "kelly.abernathy@bcsbmail.com",
    "Zachary.Cabral@bcsbmail.com"
]
bcc_recipients = [
    "chad.doorley@bcsbmail.com"
]
outlook = win32.Dispatch("Outlook.Application")
message = outlook.CreateItem(0)
# message.Display()
message.To = ";".join(recipients)
message.BCC = ";".join(bcc_recipients)
message.Subject = f"CML Trial Balance Multiple Properties - {current_date}"
message.Body = "Hi, \n\nAttached is the CML Trial Balance with Multiple Properties per Acctnbr. Please let me know if you have any questions."
message.Attachments.Add(str(full_path))
message.Send()
print("Email sent!")




Excel file saved with autofit at \\00-da1\Home\Share\Data & Analytics Initiatives\Project Management\Chad Projects\Monthly Reports\Trial Balance Ops\Production\Output\CML_Trial_Balance_Ops_MultipleProperties20250131.xlsx
Excel process complete
Email sent!
