In [1]:

"""
Main Entry Point
"""
import shutil
import re
from pathlib import Path
from typing import List
from datetime import datetime

from lxml import html
import pandas as pd # type: ignore
import numpy as np

import src.fetch_cocc_data # type: ignore
import src.ingest # type: ignore
import src.output_to_excel_multiple_sheets # type: ignore
from src._version import __version__
import src.rel_entity_officer


# def main(production_flag: bool=False):
#     if production_flag:
#         BASE_PATH = Path(r'\\00-DA1\Home\Share\Line of Business_Shared Services')
#         assert "prod" in __version__, (f"Cannot run in production mode without 'prod' in the __version__")
#     else:
#         BASE_PATH = Path('.')

files = src.ingest.process_xls_files()

# %%
# files.keys()
# %%


# %%


# %%

# %%
cocc_data = src.fetch_cocc_data.fetch_data()

# %%
cocc_data = cocc_data['wh_acctcommon'].copy()

# %%
cocc_data

# %%
# Function to get mode, handling cases where there might be multiple modes
def get_mode(series):
    series_clean = series.dropna()
    if len(series_clean) == 0:
        return None
    
    # Get unique values first
    unique_values = pd.Series(series_clean.unique())
    mode_result = unique_values.mode()
    
    # Return first mode if multiple modes exist
    return mode_result.iloc[0] if len(mode_result) > 0 else None
# Group and calculate mode
cocc_data_grouped = cocc_data.groupby('ownersortname').agg({
    'loanofficer': get_mode,
    'acctofficer': get_mode
}).reset_index()

cocc_data_grouped = cocc_data_grouped.rename(columns={
    'ownersortname':'customer_name',
    'loanofficer':'Loan Officer',
    'acctofficer':'Deposit Officer',
}).copy()
cocc_data_grouped

# %%
# cocc_data_grouped.info()
rel_entity_grouped = src.rel_entity_officer.create_officer_df()

Processing: 4i2k3xxawn3kktdqxcw4zyuk-7cfd406d357144489c89546b7daa2613.xls
Report title: 'ticklers 1 or more days past due'
Extracted 1672 records for 1179 customers
Mapped to: ticklers_past_due
Moved to archive: 4i2k3xxawn3kktdqxcw4zyuk-7cfd406d357144489c89546b7daa2613.xls
Processing: 4i2k3xxawn3kktdqxcw4zyuk-b9b20685f1c24cf9a9959db3057f4c2a.xls
Report title: 'covenants 1 or more days past due'
Extracted 375 records for 253 customers
Mapped to: covenants_past_due
Moved to archive: 4i2k3xxawn3kktdqxcw4zyuk-b9b20685f1c24cf9a9959db3057f4c2a.xls
Processing: 4i2k3xxawn3kktdqxcw4zyuk-e5c6ed541f994b1a93b14de5deca14ec.xls
Report title: 'covenants 1 or more days in default'
Extracted 15 records for 14 customers
Mapped to: covenants_in_default
Moved to archive: 4i2k3xxawn3kktdqxcw4zyuk-e5c6ed541f994b1a93b14de5deca14ec.xls
Processed 3 dataframes: ['ticklers_past_due', 'covenants_past_due', 'covenants_in_default']


In [27]:
# %%
import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
import cdutils.deduplication # type: ignore
import cdutils.input_cleansing # type: ignore
import numpy as np
import pandas as pd
import re


def fetch_data():
    """
    Main data query
    """
    wh_acctcommon = text(f"""
    SELECT
        a.OWNERSORTNAME,
        a.LOANOFFICER,
        a.ACCTOFFICER,
        a.ACCTNBR
    FROM
        OSIBANK.WH_ACCTCOMMON a
    WHERE
        (a.CURRACCTSTATCD IN ('ACT','NPFM','DORM'))
        AND (a.MJACCTTYPCD IN ('CML','MLN','CK','SAV','TD'))
        AND (a.CURRMIACCTTYPCD != 'CI07')
    """)

    wh_allroles = text(f"""
    SELECT
        *
    FROM
        OSIBANK.WH_ALLROLES a
    WHERE
        a.ACCTROLECD in ('OWN', 'GUAR', 'LNCO', 'Tax Owner','Tax Signator','SIGN')
    """)

    wh_org = text(f"""
    SELECT
        *
    FROM
        OSIBANK.WH_ORG a
    """)

    wh_pers = text(f"""
    SELECT
        *
    FROM
        OSIBANK.WH_PERS a
    """)

    queries = [
        {'key':'wh_acctcommon', 'sql':wh_acctcommon, 'engine':1},
        {'key':'wh_allroles', 'sql':wh_allroles, 'engine':1},
        {'key':'wh_org', 'sql':wh_org, 'engine':1},
        {'key':'wh_pers', 'sql':wh_pers, 'engine':1},
    ]


    data = cdutils.database.connect.retrieve_data(queries)
    return data

def create_officer_df():
    # %%
    data = fetch_data()

    # %%
    wh_acctcommon = data['wh_acctcommon'].copy()
    wh_allroles = data['wh_allroles'].copy()
    wh_org = data['wh_org'].copy()
    wh_pers = data['wh_pers'].copy()

    # %%
    # We no longer have to do this because we filter on DB level
    # wh_allroles = wh_allroles[wh_allroles['acctrolecd'].isin(['OWN', 'GUAR', 'LNCO', 'Tax Owner'])].copy()

    # %%
    wh_allroles

    # %%
    wh_allroles_schema = {
        'acctnbr':'str'
    }

    wh_acctcommon_schema = {
        'acctnbr':'str'
    }

    wh_allroles = cdutils.input_cleansing.enforce_schema(wh_allroles, wh_allroles_schema)
    wh_acctcommon = cdutils.input_cleansing.enforce_schema(wh_acctcommon, wh_acctcommon_schema)


    # %%

    # %%
    merged_df = pd.merge(wh_allroles, wh_acctcommon, on='acctnbr', how='left')

    # %%
    merged_df

    # %%

    # %%
    dedupe_list = [
        {'df':wh_org, 'field':'orgnbr'},
        {'df':wh_pers, 'field':'persnbr'}
    ]

    # %%
    wh_org_clean, wh_pers_clean = cdutils.deduplication.dedupe(dedupe_list)

    # %%
    wh_org_clean

    # %%
    wh_pers_clean

    # %%
    assert wh_org_clean['orgnbr'].is_unique, "Fail"
    assert wh_pers_clean['persnbr'].is_unique, "Fail"


    # %%
    wh_org_clean = wh_org_clean[[
        'orgnbr',
        'orgname'
    ]].copy()

    wh_pers_clean = wh_pers_clean[[
        'persnbr',
        'persname'
    ]].copy()

    # %%
    merged_df = pd.merge(merged_df, wh_org_clean, on='orgnbr', how='left')

    # %%
    merged_df = pd.merge(merged_df, wh_pers_clean, on='persnbr', how='left')

    # %%
    merged_df

    # %%


    # %%
    merged_df['customer_name'] = np.where(merged_df['persname'].isnull(), merged_df['orgname'], merged_df['persname'])

    merged_df = merged_df.dropna(subset='loanofficer')


    def clean_customer_name(name):
        """
        Clean a customer name by removing middle initials and suffixes to enable better joining.
        
        Examples:
            "Stephen P. Blaze" -> "Stephen Blaze"
            "John Q. Public" -> "John Public"
            "Mary Jane Smith JR" -> "Mary Jane Smith"
            "Robert A. Johnson JR." -> "Robert Johnson"
            "J.P. Morgan" -> "J.P. Morgan" (multiple initials, leave as is)
        
        Args:
            name (str): The customer name to clean
            
        Returns:
            str: The cleaned customer name with middle initials and suffixes removed
        """
        if not isinstance(name, str) or not name.strip():
            return name
        
        # Remove extra whitespace and normalize
        name = ' '.join(name.split())
        
        # Common suffixes to remove (case-insensitive matching)
        suffixes = ['JR', 'JR.', 'SR', 'SR.', 'II', 'III', 'IV', 'V']
        
        # Remove suffixes from the end
        for suffix in suffixes:
            # Check for suffix at the end (case-insensitive)
            if name.upper().endswith(' ' + suffix):
                name = name[:-len(' ' + suffix)].strip()
                break
        
        # Only process names that look like people (not businesses)
        # Skip if it contains business indicators
        business_indicators = ['LLC', 'LLP', 'INC', 'CORP', 'CORPORATION', 'LTD', 'LIMITED',
                            'CO', 'COMPANY', 'TRUST', 'ESTATE', 'FOUNDATION', 'FUND',
                            'REALTY', 'CONSTRUCTION', 'SERVICES', 'GROUP', 'ASSOCIATES',
                            'ENTERPRISES', 'SOLUTIONS', 'SYSTEMS', 'TECHNOLOGIES',
                            'HOLDINGS', 'PROPERTIES', 'MANAGEMENT', 'CONSULTING',
                            'PARTNERSHIP', 'PARTNERS', 'INVESTMENTS', 'CAPITAL',
                            'VENTURES', 'DEVELOPMENT', 'BUILDERS', 'CONTRACTORS',
                            'MORTGAGE', 'FINANCIAL', 'INSURANCE', 'AGENCY', 'FIRM']
        
        name_upper = name.upper()
        if any(indicator in name_upper for indicator in business_indicators):
            return name  # Don't modify business names
        
        # Pattern to match person names: First name + Middle Initial + Last name
        # Only matches if it looks like: "FirstName MiddleInitial LastName" (3 parts total)
        # This avoids matching business names like "N B Vision Realty"
        parts = name.split()
        
        if len(parts) == 3:
            first_name, middle_part, last_name = parts
            
            # Only remove middle initial if:
            # 1. Middle part is a single letter with optional period
            # 2. First and last names look like typical person names (start with capital, reasonable length)
            if (len(middle_part.replace('.', '')) == 1 and 
                middle_part[0].isupper() and
                len(first_name) > 1 and first_name[0].isupper() and
                len(last_name) > 1 and last_name[0].isupper()):
                return f"{first_name} {last_name}"
        
        # If no middle initial pattern found, return the name (already suffix-cleaned)
        return name
    def normalize_customer_names(df, customer_column='customer_name'):
        """
        Normalize customer names in a DataFrame by removing middle initials.
        
        Args:
            df (pd.DataFrame): The DataFrame containing customer names
            customer_column (str): The name of the column containing customer names
            
        Returns:
            pd.DataFrame: DataFrame with normalized customer names
        """
        if df is None or df.empty:
            return df
        
        if customer_column not in df.columns:
            raise ValueError(f"Column '{customer_column}' not found in DataFrame")
        
        # Create a copy to avoid modifying the original
        df_normalized = df.copy()
        
        # Apply the cleaning function to the customer name column
        df_normalized[customer_column] = df_normalized[customer_column].apply(clean_customer_name)
        
        return df_normalized
    merged_df = normalize_customer_names(merged_df)

    # %%
    # merged_df.info()

    # %%
    # Function to get mode, handling cases where there might be multiple modes
    def get_mode(series):
        series_clean = series.dropna()
        if len(series_clean) == 0:
            return None
        
        # Get unique values first
        unique_values = pd.Series(series_clean.unique())
        mode_result = unique_values.mode()
        
        # Return first mode if multiple modes exist
        return mode_result.iloc[0] if len(mode_result) > 0 else None
    # Group and calculate mode
    rel_entity_data_grouped = merged_df.groupby('customer_name').agg({
        'loanofficer': get_mode,
    }).reset_index()

    rel_entity_data_grouped = rel_entity_data_grouped.rename(columns={
        'loanofficer':'Loan Officer_related',
    }).copy()

    # %%
    rel_entity_data_grouped = rel_entity_data_grouped.dropna(subset='Loan Officer_related').copy()

    # %%
    return rel_entity_data_grouped

    # %%






In [28]:
rel_entity_grouped = create_officer_df()

In [29]:


# %%
def merge_with_mode(df_dict, cocc_data_grouped, rel_entity_grouped):
    """
    Take in the dictionary of dataframes and append the mode of the loan officer and acct officer

    Only applies to active/dorm/non-performing accounts
    """
    merged_dict = {}
    for key, df in df_dict.items():
        # df['customer_name'] = df['customer_name'].str.replace('.','', regex=False)
        merged_df = df.merge(cocc_data_grouped, on='customer_name', how='left')
        merged_df = merged_df.merge(rel_entity_grouped, on='customer_name', how='left')
        
        # handling dtypes
        date_fields = ['period_date','due_date','report_date']
        for field in date_fields:
            merged_df[field] = pd.to_datetime(merged_df[field])

        merged_df = merged_df.sort_values(by='period_date', ascending=True)

        merged_df['Loan Officer_new'] = np.where(merged_df['Loan Officer'].isnull(), merged_df['Loan Officer_related'], merged_df['Loan Officer'])

        merged_df = merged_df[[
            'customer_name',
            'Loan Officer_new',
            'Deposit Officer',
            'item_name',
            'required_value',
            'actual_value',
            'period_date',
            'due_date',
            'days_past_due',
            'interval',
            'comments',
            'report_date'
        ]].copy()

        merged_df = merged_df.rename(columns={
            'Loan Officer_new':'Loan Officer'
        }).copy()

        merged_dict[key] = merged_df

    return merged_dict


# %%
cleaned_dict = merge_with_mode(files, cocc_data_grouped, rel_entity_grouped)

# %%
cleaned_dict.keys()

dict_keys(['ticklers_past_due', 'covenants_past_due', 'covenants_in_default'])

In [30]:


# %%
ticklers_past_due = cleaned_dict['ticklers_past_due'].copy()
ticklers_past_due



Unnamed: 0,customer_name,Loan Officer,Deposit Officer,item_name,required_value,actual_value,period_date,due_date,days_past_due,interval,comments,report_date
1320,ROBERT ZAMMITO,ROGER A. CABRAL,,Personal Financial Statement,,,2021-12-31,2022-04-30,1170,Annually,,2025-07-13
1199,PAUL ZAMMITO,ROGER A. CABRAL,,Personal Financial Statement,,,2021-12-31,2022-04-30,1170,Annually,,2025-07-13
237,"BLAZING REPAIR SHOP, LLC",PETER ST JEAN,,Corporate Tax Return,,,2021-12-31,2022-04-30,1170,Annually,,2025-07-13
548,ELIZABETH NEVEUX,ROGER A. CABRAL,,Personal Financial Statement,,,2021-12-31,2022-04-30,1170,Annually,,2025-07-13
376,CHRISTOPHER ZAMMITO,ROGER A. CABRAL,,Personal Financial Statement,,,2021-12-31,2022-04-30,1170,Annually,,2025-07-13
...,...,...,...,...,...,...,...,...,...,...,...,...
660,GIUSEPPE PAGNANI,ANDREW J. OMER,,Personal Financial Statement,,,2025-06-04,2025-07-05,8,Annually,,2025-07-13
895,KENNETH FOLEY,MARK A. BORKMAN,,Personal Financial Statement,,,2025-06-12,2025-07-12,1,Annually,,2025-07-13
66,490 WINTHROP STREET LLC,BRANDON CANNATA,BRANDON CANNATA,Site Inspection,,,2025-06-27,2025-06-27,16,Annually,,2025-07-13
670,"GREENSCAPE LAND DESIGN, INC",,,Borrowing Base Package,,,2025-06-30,2025-07-10,3,Monthly,,2025-07-13


In [31]:
rel_entity_grouped

Unnamed: 0,customer_name,Loan Officer_related
0,02908 HOLDCO LLC,JEFFREY M. VIALL
1,103 HIGHLAND LLC,SBLC LOAN OFFICER
2,"105 SOUTH WASHINGTON, LLC",PETER ST JEAN
3,1095 NEWMAN AVE LLC,SBLC LOAN OFFICER
4,11 NOVEMBER 1918 HOLDINGS LLC,SBLC LOAN OFFICER
...,...,...
4618,ZEITERION THEATRE INC,WILLITTS S. MENDONCA
4619,ZENITH VENTURES LLC,JOSHUA A. CAMARA
4620,ZEYCO CONSTRUCTION LLC,EBL PROGRAM ADMIN
4621,ZI ZHANG,KEVIN M. MCCARTHY


In [32]:
# %%
covenants_past_due = cleaned_dict['covenants_past_due'].copy()
covenants_past_due

Unnamed: 0,customer_name,Loan Officer,Deposit Officer,item_name,required_value,actual_value,period_date,due_date,days_past_due,interval,comments,report_date
100,CENTREDALE REVIVAL LLC,PETER ST JEAN,PETER ST JEAN,Investment Property NOI/TDS,1.25,,2022-12-31,2023-02-14,880,Annually,,2025-07-13
265,PREMIER CONSTRUCTION INC,JOAN M. MEDEIROS,JOAN M. MEDEIROS,LOC 30 Day Clean-up or Clean Down,0.00,,2022-12-31,2023-02-14,880,Annually,,2025-07-13
179,HFC REALTY LLC,PETER ST JEAN,,C&I OCF/TDS combined with Realty,1.20,,2022-12-31,2023-09-15,667,Annually,,2025-07-13
304,SR MANAGEMENT GROUP LLC,PETER ST JEAN,PETER ST JEAN,Investment Property NOI/TDS,1.25,,2022-12-31,2023-02-14,880,Annually,,2025-07-13
203,KM DOVER LLC,ROGER A. CABRAL,ROGER A. CABRAL,Maximum LTV of XXX%,70.00,,2023-12-31,2024-04-29,440,Annually,,2025-07-13
...,...,...,...,...,...,...,...,...,...,...,...,...
123,DARLING DEVELOPMENT CORPORATION,ROGER A. CABRAL,FRANK P. WILHELM,C&amp;I OCF/TDS Combined,1.00,,2024-12-31,2025-04-30,74,Annually,Borrower shall maintain a consolidated ratio o...,2025-07-13
132,DICKSON MEADOW CONDOMINIUM TRUST,HOWARD HIMMEL,HOWARD HIMMEL,10% reserve in budget,10,,2024-12-31,2025-04-30,74,Annually,,2025-07-13
374,ZAMMITO AUTOMOTIVE REAL ESTATE LLC,ROGER A. CABRAL,ROGER A. CABRAL,Investment Property NOI/TDS,1.2,,2024-12-31,2025-04-30,74,Annually,,2025-07-13
286,ROLANDS TIRE SERVICE INC,JOSHUA A. CAMARA,JOSHUA A. CAMARA,LOC 30 Day Clean-up or Clean Down,0.00,,2025-02-28,2025-06-28,15,Annually,,2025-07-13


In [33]:
# %%

covenants_in_default = cleaned_dict['covenants_in_default'].copy()
covenants_in_default



Unnamed: 0,customer_name,Loan Officer,Deposit Officer,item_name,required_value,actual_value,period_date,due_date,days_past_due,interval,comments,report_date
5,EAST COAST SHED INC,SBLC LOAN OFFICER,GEORGE J. MENDROS,LOC 30 Day Clean-up or Clean Down,30.0,27.0,2020-12-31,2021-04-30,,Annually,,2025-07-13
14,THE PAWS GROUP LLC,SBLC LOAN OFFICER,SBLC LOAN OFFICER,Post-Distribution DSC,1.25,0.38,2021-12-31,2022-04-30,,Annually,"EBITDA (after taxes, distributions, and unfina...",2025-07-13
3,CENTREDALE REVIVAL LLC,PETER ST JEAN,PETER ST JEAN,C&I OCF/TDS combined with Realty,1.25,-0.55,2022-12-31,2023-04-30,,Annually,,2025-07-13
4,CURRY WATERPROOFING & MASONRY RESTORATION INC,SBLC LOAN OFFICER,FRANK P. WILHELM,LOC 30 Day Clean-up or Clean Down,0.0,33552.44,2022-12-31,2023-02-14,,Annually,,2025-07-13
6,HA HOSPITALITY GROUP LLC,PETER ST JEAN,PETER ST JEAN,C&I OCF/TDS,1.25,0.97,2022-12-31,2023-09-20,,Annually,,2025-07-13
8,MAG IRRIGATION INC,,,LOC 30 Day Clean-up or Clean Down,30.0,0.0,2022-12-31,2023-05-01,,Annually,,2025-07-13
10,SPECTRUM THERMAL PROCESSING LLC,SBLC LOAN OFFICER,LAURA A. STACK,C&I OCF/TDS,1.2,0.62,2022-12-31,2023-02-14,,Annually,,2025-07-13
11,SR MANAGEMENT GROUP LLC,PETER ST JEAN,PETER ST JEAN,C&I OCF/TDS combined with Realty,1.25,-0.55,2022-12-31,2023-04-30,,Annually,,2025-07-13
12,"THE BATTERY CONNECTION, INC",,,LOC 30 Day Clean-up or Clean Down,30.0,0.0,2022-12-31,2023-03-31,,Annually,,2025-07-13
7,JNK REALTY LLC,ROGER A. CABRAL,,Investment Property NOI/TDS,1.2,0.87,2023-12-31,2024-04-29,,Annually,,2025-07-13


In [None]:
# # %%
# ticklers_coming_due_365 = cleaned_dict['ticklers_coming_due_365'].copy()
# ticklers_coming_due_365



# # %%
# covenants_coming_due_365 = cleaned_dict['covenants_coming_due_365'].copy()
# covenants_coming_due_365

# %%


# COVENANT_OUTPUT_PATH = BASE_PATH / Path('./output/CT_Covenant_Tracking.xlsx')
# with pd.ExcelWriter(COVENANT_OUTPUT_PATH, engine="openpyxl") as writer:
#     # covenants_coming_due_365.to_excel(writer, sheet_name='Coming Due', index=False)
#     covenants_past_due.to_excel(writer, sheet_name='Past Due', index=False)
#     covenants_in_default.to_excel(writer, sheet_name='In Default', index=False)

# # Format excel
# src.output_to_excel_multiple_sheets.format_excel_file(COVENANT_OUTPUT_PATH)

# TICKLER_OUTPUT_PATH = BASE_PATH / Path('./output/CT_Tickler_Tracking.xlsx')
# with pd.ExcelWriter(TICKLER_OUTPUT_PATH, engine="openpyxl") as writer:
#     # ticklers_coming_due_365.to_excel(writer, sheet_name='Coming Due', index=False)
#     ticklers_past_due.to_excel(writer, sheet_name='Past Due', index=False)

# # Format excel
# src.output_to_excel_multiple_sheets.format_excel_file(TICKLER_OUTPUT_PATH)

# Distribution
# recipients = [
#     # "chad.doorley@bcsbmail.com",
# ]
# bcc_recipients = [
#     "chad.doorley@bcsbmail.com",
#     "businessintelligence@bcsbmail.com"
# ]
# subject = f"File Name" 
# body = "Hi, \n\nAttached is your requested report. If you have any questions, please reach out to BusinessIntelligence@bcsbmail.com \n\nThanks!"
# attachment_paths = [OUTPUT_PATH]
# cdutils.distribution.email_out(recipients, bcc_recipients, subject, body, attachment_paths)






# if __name__ == '__main__':
# print(f"Starting [{__version__}]")
# # main(production_flag=True)
# main()
# print("Complete!")

