In [1]:
import cdutils.database.connect

In [2]:
import pandas as pd

In [3]:
"""
Fetching data module. Aim is import all necessary fields up front, but if needed, you can define another function to be called here.

Usage:
    import src.cdutils.database
"""

from sqlalchemy import text # type: ignore

def fetch_data():
     # acctcommon
    # engine 1

    allroles = text("""
    SELECT 
        *
    FROM 
        OSIBANK.WH_ALLROLES a
    WHERE
        a.ACCTROLECD IN ('GUAR')
    """)

    pers = text("""
    SELECT 
        a.PERSNBR,
        a.FIRSTNAME,
        a.LASTNAME,
        a.DATEBIRTH,
        a.DEATHNOTIFICATIONDATE
    FROM 
        OSIBANK.PERS a
    """)

    viewperstaxid = text("""
    SELECT 
        *
    FROM 
        OSIBANK.VIEWPERSTAXID
    """)

    wh_addr = text("""
    SELECT
        *
    FROM
        OSIBANK.WH_ADDR
    """)

    persaddruse = text("""
    SELECT
        *
    FROM
        OSIBANK.PERSADDRUSE
    """)    

    queries = [
        {'key':'allroles', 'sql':allroles, 'engine':1},
        {'key':'pers', 'sql':pers, 'engine':1},
        {'key':'viewperstaxid', 'sql':viewperstaxid, 'engine':1},
        {'key':'wh_addr', 'sql':wh_addr, 'engine':1},
        {'key':'persaddruse', 'sql':persaddruse, 'engine':1},
    ]

    data = cdutils.database.connect.retrieve_data(queries)
    return data

In [4]:
data = fetch_data()
allroles = data['allroles'].copy()
pers = data['pers'].copy()
viewperstaxid = data['viewperstaxid'].copy()
wh_addr = data['wh_addr'].copy()
persaddruse = data['persaddruse'].copy()

In [5]:
def append_tax_id_to_pers(pers: pd.DataFrame, viewperstaxid: pd.DataFrame) -> pd.DataFrame:
    """
    Creating a pers table with SSN. This will later be added to with credit score
    """
    assert pers['persnbr'].is_unique, "Duplicates exist"
    assert viewperstaxid['persnbr'].is_unique, "Duplicates exist"

    df = pd.merge(pers, viewperstaxid, on='persnbr', how='left')

    return df

In [6]:
pers = append_tax_id_to_pers(pers, viewperstaxid)


In [7]:
pers

Unnamed: 0,persnbr,firstname,lastname,datebirth,deathnotificationdate,taxid
0,127,DEPOSIT8,COCC,NaT,NaT,
1,180,ROCH2,COCC,NaT,NaT,
2,181,ROCH3,COCC,NaT,NaT,
3,182,ROCH4,COCC,NaT,NaT,
4,184,PRINT6,COCC,NaT,NaT,
...,...,...,...,...,...,...
165120,1167967,JESSIE,RAMDHANY CORREA,1995-03-19,NaT,095961515
165121,1167973,THOMAS,MURPHY,1968-03-03,NaT,030627017
165122,1167983,VICTOR,COLON TORRES,1984-12-07,NaT,584993264
165123,1167985,JOHN,DEFONTES,1958-08-31,NaT,038385652


In [8]:
# Personal Guarantors extracted
def personal_guarantors(allroles, persaddruse, wh_addr, pers):
    """
    Personal Guarantor information is pulled from COCC and several tables are merged.
    
    Args:
        allroles: ALLROLES table (COCC)
        persaddruse: PERSADDRUSE table (COCC)
        wh_addr: WH_ADDR table (COCC)
        pers: WH_PERS table (COCC)
        
    Returns:
        df: Dataframe of personal guarantors
        
    Operations:
        - allroles table where 'acctrolecd' = 'GUAR' (guarantor role)
        - allroles where 'persnbr' is not null (this excludes organizations)
        - persaddruse where 'addrusecd' == 'PRI' (only primary address is considered)
        - left merge of allroles & persaddruse tables on 'persnbr'
        - left merge of df (merged df from earlier step) & wh_addr on 'addrnbr'
        - left merge of df & pers on 'persnbr'
        - filtered out unnecessary fields
            - keeping only ['acctnbr','persnbr','firstname','lastname','text1',
                            'cityname','statecd','zipcd']
    """
    allroles = allroles[allroles['acctrolecd'] == 'GUAR']
    allroles = allroles[allroles['persnbr'].notnull()]
    persaddruse = persaddruse[persaddruse['addrusecd'] == "PRI"]
    # Merge
    df = pd.merge(allroles, persaddruse, on='persnbr',how='left', suffixes=('_allroles','_persaddruse'))
    df = pd.merge(df, wh_addr, on='addrnbr',how='left', suffixes=('_df','_addr'))
    df = pd.merge(df, pers, on='persnbr', how='left', suffixes=('_df','_pers'))
    df = df[['acctnbr','persnbr','firstname','lastname','text1','cityname','statecd','zipcd','taxid']]
    return df



In [9]:
pers_data = personal_guarantors(allroles, persaddruse, wh_addr, pers)

In [10]:
target_loans = pd.read_csv(r"Z:\Credit_Loan_Review\Alerts\Development\assets\xactus\temp_data\target_loans.csv")

In [11]:
# def fetch_more_data():
#      # acctcommon
#     # engine 1

#     acctcommon = text("""
#     SELECT 
#         a.ACCTNBR,
#         a.TAXRPTFORPERSNBR
#     FROM 
#         OSIBANK.WH_ACCTCOMMON a
#     """)

        

#     queries = [
#         {'key':'acctcommon', 'sql':acctcommon, 'engine':1},
#     ]

#     data = cdutils.database.connect.retrieve_data(queries)
#     return data

In [12]:
# more_data = fetch_more_data()

In [13]:
target_loans = pd.merge(target_loans, pers_data, on='acctnbr', how='left')

In [14]:
target_loans

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,currmiaccttypcd,bookbalance,loanofficer,ownername,curracctstatcd,contractdate,...,householdnbr,Total Exposure_hhgroup,persnbr,firstname,lastname,text1,cityname,statecd,zipcd,taxid
0,150664441,2025-04-04,CML,Express Business LOC,CM57,0.00,EBL PROGRAM ADMIN,"FERNANDES & CHAREST, P.C.",ACT,2021-09-10,...,200437.0,110000.00,1107236.0,JOHNNY,CORDEIRO,32 REEVES ST,FALL RIVER,MA,02721,011708277
1,150664441,2025-04-04,CML,Express Business LOC,CM57,0.00,EBL PROGRAM ADMIN,"FERNANDES & CHAREST, P.C.",ACT,2021-09-10,...,200437.0,110000.00,1115852.0,OTTO,SCHLEINKOFER,10 HARVEST LN,BERKLEY,MA,02779,046687767
2,150304683,2025-04-04,CML,Line of Credit,CM30,100000.00,THOMAS D. KELLY,2120 PLEASANT STREET REALTY TRUST,ACT,2019-04-05,...,115347.0,350000.00,421.0,NANCY,DASILVA,2265 WHEELER ST,NORTH DIGHTON,MA,02764,019660637
3,150304683,2025-04-04,CML,Line of Credit,CM30,100000.00,THOMAS D. KELLY,2120 PLEASANT STREET REALTY TRUST,ACT,2019-04-05,...,115347.0,350000.00,1027703.0,BRIAN,DASILVA,2265 WHEELER ST,NORTH DIGHTON,MA,02764,015700643
4,600086981811,2025-04-04,CML,Line of Credit,CM30,15000.00,SBLC LOAN OFFICER,IPSCO INC,ACT,2017-07-27,...,185920.0,25000.00,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,150273862,2025-04-04,CML,Line of Credit,CM30,70000.00,SBLC LOAN OFFICER,EAST COAST SHED INC,ACT,2019-01-14,...,179134.0,188570.23,,,,,,,,
364,600143780699,2025-04-04,CML,Borrowing Base Line of Credit,CM06,0.00,ALISSA E. HALL,BI COUNTY COLLABORATIVE,ACT,2003-04-17,...,183070.0,600000.00,,,,,,,,
365,400261381666,2025-04-04,CML,Equipment Line of Credit,CM11,0.00,MARK A. BORKMAN,YANKEE LEASING CORPORATION,ACT,2015-05-21,...,191141.0,640000.00,1067999.0,JAMES,HUTZLER,53 WATER WAY,BARRINGTON,RI,02806,035449782
366,150642942,2025-04-04,CML,Line of Credit,CM30,91141.56,SBLC LOAN OFFICER,"M.O.L.I.F.E., INC.",ACT,2021-08-18,...,151720.0,100000.00,,,,,,,,


In [15]:
assert target_loans['persnbr'].is_unique, "Duplicates"

AssertionError: Duplicates

In [None]:
# is_duplicate = target_loans['persnbr'].duplicated(keep=False)
# duplicates_df = target_loans[is_duplicate]
# duplicates_df = duplicates_df.sort_values('persnbr')

In [None]:
# duplicates_df

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,currmiaccttypcd,bookbalance,loanofficer,ownername,curracctstatcd,contractdate,...,householdnbr,Total Exposure_hhgroup,persnbr,firstname,lastname,text1,cityname,statecd,zipcd,taxid
2,150304683,2025-04-04,CML,Line of Credit,CM30,100000.00,THOMAS D. KELLY,2120 PLEASANT STREET REALTY TRUST,ACT,2019-04-05,...,115347.0,350000.00,421.0,NANCY,DASILVA,2265 WHEELER ST,NORTH DIGHTON,MA,02764,019660637
114,150972969,2025-04-04,CML,Line of Credit,CM30,87200.00,THOMAS D. KELLY,THIRTY NINE NORTH PLEASANT STREET REALTY TRUST,ACT,2023-11-16,...,115347.0,350000.00,421.0,NANCY,DASILVA,2265 WHEELER ST,NORTH DIGHTON,MA,02764,019660637
115,150972969,2025-04-04,CML,Line of Credit,CM30,87200.00,THOMAS D. KELLY,THIRTY NINE NORTH PLEASANT STREET REALTY TRUST,ACT,2023-11-16,...,115347.0,350000.00,1027703.0,BRIAN,DASILVA,2265 WHEELER ST,NORTH DIGHTON,MA,02764,015700643
3,150304683,2025-04-04,CML,Line of Credit,CM30,100000.00,THOMAS D. KELLY,2120 PLEASANT STREET REALTY TRUST,ACT,2019-04-05,...,115347.0,350000.00,1027703.0,BRIAN,DASILVA,2265 WHEELER ST,NORTH DIGHTON,MA,02764,015700643
253,500273181780,2025-04-04,CML,Line of Credit,CM30,0.00,BRANDON CANNATA,NICKEL RENTALS LLC,ACT,2017-01-11,...,186813.0,272000.00,1033137.0,MARIO,CARVALHO,18 FRANK ST,ACUSHNET,MA,02743,028528834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,151088814,2025-04-04,CML,Line of Credit,CM30,0.00,SBLC LOAN OFFICER,"MASTER BUILDER MINISTRIES, INC.",ACT,2024-08-15,...,272087.0,250000.00,,,,,,,,
356,4102593,2025-04-04,CML,FNB - CML Line of Credit,CM62,0.00,SBLC LOAN OFFICER,WOMEN'S FUND OF RHODE ISLAND,ACT,2013-11-13,...,230962.0,25000.00,,,,,,,,
363,150273862,2025-04-04,CML,Line of Credit,CM30,70000.00,SBLC LOAN OFFICER,EAST COAST SHED INC,ACT,2019-01-14,...,179134.0,188570.23,,,,,,,,
364,600143780699,2025-04-04,CML,Borrowing Base Line of Credit,CM06,0.00,ALISSA E. HALL,BI COUNTY COLLABORATIVE,ACT,2003-04-17,...,183070.0,600000.00,,,,,,,,


In [18]:
target_loans_dedupe = target_loans.drop_duplicates(subset=['persnbr'], keep='first')

In [20]:
target_loans['persnbr'].nunique()

317

In [21]:
target_loans = target_loans.dropna(subset=['persnbr'])

In [23]:
from pathlib import Path

In [26]:
PERMISSIONS_PATH = Path(r"Z:\Credit_Loan_Review\Alerts\Development\assets\xactus\pfs_permission.csv")
permissions_df = pd.read_csv(PERMISSIONS_PATH)

In [27]:
permissions_df

Unnamed: 0,persnbr,firstname,lastname,Permission
0,327.0,DONALD,SMYTH,Y
1,421.0,NANCY,DASILVA,Y
2,1012786.0,MICHAEL,BRIGGS,Y
3,1021325.0,PAMELA,DUMAS,Y
4,1025471.0,TIMOTHY,DUBUC,Y
...,...,...,...,...
271,1165744.0,CARLOS,COSTA,Y
272,1165801.0,JOSHUA,ABREU,Y
273,1166597.0,JULIE,EISENHAUER,Y
274,1166896.0,OWEN,DOYLE,Y


In [28]:
permissions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   persnbr     276 non-null    float64
 1   firstname   276 non-null    object 
 2   lastname    276 non-null    object 
 3   Permission  276 non-null    object 
dtypes: float64(1), object(3)
memory usage: 8.8+ KB


In [29]:
target_loans.info()

<class 'pandas.core.frame.DataFrame'>
Index: 328 entries, 0 to 367
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   acctnbr                 328 non-null    int64  
 1   effdate                 328 non-null    object 
 2   mjaccttypcd             328 non-null    object 
 3   product                 328 non-null    object 
 4   currmiaccttypcd         328 non-null    object 
 5   bookbalance             328 non-null    float64
 6   loanofficer             328 non-null    object 
 7   ownername               328 non-null    object 
 8   curracctstatcd          328 non-null    object 
 9   contractdate            328 non-null    object 
 10  notebal                 328 non-null    float64
 11  cobal                   328 non-null    int64  
 12  creditlimitamt          328 non-null    int64  
 13  riskratingcd            328 non-null    object 
 14  totalpctsold            328 non-null    int64  

In [30]:
target_with_permission = pd.merge(target_loans, permissions_df, on='persnbr', how='outer', indicator=True)

In [None]:
target_with_permission

In [32]:
OUTPUT_PATH_TEMP = Path(r"Z:\Credit_Loan_Review\Alerts\Development\assets\xactus\temp_data\outbound_xactus_cut.csv")
target_with_permission.to_csv(OUTPUT_PATH_TEMP, index=False)

In [33]:
# Post permission check
INPUT_PERMISSION_CHECK = Path(r"Z:\Credit_Loan_Review\Alerts\Development\assets\xactus\temp_data\post_permission_check.xlsx")


In [34]:
permission_update2 = pd.read_excel(INPUT_PERMISSION_CHECK)

In [37]:
import numpy as np

In [39]:
permission_update2['Permission'] = np.where(permission_update2['Permission'] == 'y', "Y", permission_update2['Permission'])

In [40]:
permission_update2['Permission'].unique()

array(['Y', nan], dtype=object)

In [50]:
valid = permission_update2[permission_update2['Permission'] == "Y"].copy()

In [51]:
valid.info()

<class 'pandas.core.frame.DataFrame'>
Index: 333 entries, 0 to 334
Data columns (total 35 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   acctnbr                 326 non-null    float64       
 1   effdate                 326 non-null    datetime64[ns]
 2   mjaccttypcd             326 non-null    object        
 3   product                 326 non-null    object        
 4   currmiaccttypcd         326 non-null    object        
 5   bookbalance             326 non-null    float64       
 6   loanofficer             326 non-null    object        
 7   ownername               326 non-null    object        
 8   curracctstatcd          326 non-null    object        
 9   contractdate            326 non-null    datetime64[ns]
 10  notebal                 326 non-null    float64       
 11  cobal                   326 non-null    float64       
 12  creditlimitamt          326 non-null    float64       


In [44]:
valid['firstname'] = valid['firstname_x']
valid['lastname'] = valid['lastname_x']

In [None]:
valid = valid[['persnbr',]]

Unnamed: 0,acctnbr,effdate,mjaccttypcd,product,currmiaccttypcd,bookbalance,loanofficer,ownername,curracctstatcd,contractdate,...,cityname,statecd,zipcd,taxid,firstname_y,lastname_y,Permission,_merge,firstname,lastname
0,151135144.0,2025-04-04,CML,Line of Credit,CM30,0.0,THOMAS D. KELLY,KEELAND NOMINEE TRUST,ACT,2025-01-09,...,ATTLEBORO,MA,2703.0,42404910.0,DONALD,SMYTH,Y,both,DONALD,SMYTH
1,150304683.0,2025-04-04,CML,Line of Credit,CM30,100000.0,THOMAS D. KELLY,2120 PLEASANT STREET REALTY TRUST,ACT,2019-04-05,...,NORTH DIGHTON,MA,2764.0,19660637.0,NANCY,DASILVA,Y,both,NANCY,DASILVA
2,150972969.0,2025-04-04,CML,Line of Credit,CM30,87200.0,THOMAS D. KELLY,THIRTY NINE NORTH PLEASANT STREET REALTY TRUST,ACT,2023-11-16,...,NORTH DIGHTON,MA,2764.0,19660637.0,NANCY,DASILVA,Y,both,NANCY,DASILVA
3,151144054.0,2025-04-04,CML,Line of Credit,CM30,0.0,SBLC LOAN OFFICER,"INSURANCE RECONSTRUCTION SERVICES, INC.",ACT,2025-02-27,...,BROOKLYN,CT,6234.0,35524004.0,,,Y,left_only,ERIC,ANDERSON
4,150447897.0,2025-04-04,CML,Express Business SBA LOC,CM56,0.0,EBL PROGRAM ADMIN,OAKHILL LANDSCAPING INC,ACT,2020-03-11,...,TAUNTON,MA,2780.0,30625641.0,,,Y,left_only,LUKE,ANDERSON
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,151075192.0,2025-04-04,CML,SBA Secured Line of Credit,CM52,0.0,ALISSA E. HALL,REDTAIL LEASING INC,ACT,2024-07-17,...,FOXBOROUGH,MA,2035.0,11645508.0,JULIE,EISENHAUER,Y,both,JULIE,EISENHAUER
331,151080828.0,2025-04-04,CML,Express Business LOC,CM57,0.0,EBL PROGRAM ADMIN,OT LLC,ACT,2024-07-22,...,PAWTUCKET,RI,2860.0,138880574.0,OWEN,DOYLE,Y,both,OWEN,DOYLE
332,151080828.0,2025-04-04,CML,Express Business LOC,CM57,0.0,EBL PROGRAM ADMIN,OT LLC,ACT,2024-07-22,...,COVENTRY,RI,2816.0,37605510.0,TRAVIS,GERVASIO,Y,both,TRAVIS,GERVASIO
333,151155506.0,2025-04-04,CML,Express Business SBA LOC,CM56,7250.0,EBL PROGRAM ADMIN,MAYYA ENVIRONMENTAL LLC,ACT,2025-03-11,...,NEW BEDFORD,MA,2740.0,11681023.0,,,Y,left_only,YMANE,GALOTTI


In [47]:
valid = valid[['persnbr','firstname','lastname','Permission']].copy()

In [48]:
valid

Unnamed: 0,persnbr,firstname,lastname,Permission
0,327,DONALD,SMYTH,Y
1,421,NANCY,DASILVA,Y
2,421,NANCY,DASILVA,Y
3,1000637,ERIC,ANDERSON,Y
4,1000763,LUKE,ANDERSON,Y
...,...,...,...,...
330,1166597,JULIE,EISENHAUER,Y
331,1166896,OWEN,DOYLE,Y
332,1167037,TRAVIS,GERVASIO,Y
333,1170104,YMANE,GALOTTI,Y


In [49]:
valid.to_csv(PERMISSIONS_PATH, index=False)