In [1]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'prod'

In [6]:

import cdutils.database.connect # type: ignore
from sqlalchemy import text # type: ignore
from datetime import datetime
from typing import Optional

# Define fetch data here using cdutils.database.connect
# There are often fetch_data.py files already in project if migrating
def fetch_data(start_date, end_date):
    """
    Main data query
    """
    query = text(f"""
    SELECT
        *
    FROM
        COCCDM.WH_RTXN a
    WHERE
        (a.RUNDATE >= TO_DATE('{start_date}', 'YYYY-MM-DD HH24:MI:SS')) AND
        (a.RUNDATE <= TO_DATE('{end_date}', 'YYYY-MM-DD HH24:MI:SS'))
    """)    
    # vieworgtaxid = text(f"""
    # SELECT
    #     *
    # FROM
    #     OSIBANK.VIEWORGTAXID a
    # """)

    queries = [
        {'key':'query', 'sql':query, 'engine':2},
        
        # {'key':'vieworgtaxid', 'sql':vieworgtaxid, 'engine':1},
    ]


    data = cdutils.database.connect.retrieve_data(queries)
    return data


In [7]:
start_date = datetime(2025, 9, 1)
end_date = datetime(2025, 9, 30)

In [8]:
data = fetch_data(start_date=start_date, end_date=end_date)

In [9]:
df = data['query'].copy()

In [10]:
df

Unnamed: 0,acctnbr,rundate,rtxnnbr,parentrtxnnbr,applnbr,applname,rtxntypcd,rtxntypdesc,holdacctnbr,currrtxnstatcd,...,membernbr,networkcd,cardtxnnbr,datelastmaint,payto,parentacctnbr,txnfeeamt,otcpersnbr,sourceid,companyentrydesc
0,150505687,2025-09-02,5213,,5275.0,ATM_ONLINE_PROC,PWTH,POS Withdrawal,,C,...,1.0,FIS,51284566.0,2025-09-02 20:37:50,,,0,,,
1,26245604,2025-09-02,11441,,5275.0,ATM_ONLINE_PROC,PWTH,POS Withdrawal,,C,...,1.0,FIS,51284573.0,2025-09-02 20:37:50,,,0,,,
2,150886839,2025-09-02,3987,,5275.0,ATM_ONLINE_PROC,PWTH,POS Withdrawal,,C,...,1.0,FIS,51284612.0,2025-09-02 20:37:50,,,0,,,
3,23651434,2025-09-02,8089,,5275.0,ATM_ONLINE_PROC,PWTH,POS Withdrawal,,C,...,1.0,FIS,51284620.0,2025-09-02 20:37:50,,,0,,,
4,150686784,2025-09-02,8805,,5275.0,ATM_ONLINE_PROC,PWTH,POS Withdrawal,,C,...,1.0,FIS,51299213.0,2025-09-02 20:37:50,,,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1052749,27015362,2025-09-30,34234,,386.0,AH_CLEAR,CWTH,Check,,C,...,,,,2025-09-30 20:44:01,,,,,611161024,
1052750,27015362,2025-09-30,34235,,386.0,AH_CLEAR,CWTH,Check,,C,...,,,,2025-09-30 20:44:01,,,,,611161024,
1052751,27015362,2025-09-30,34236,,386.0,AH_CLEAR,CWTH,Check,,C,...,,,,2025-09-30 20:44:01,,,,,611161024,
1052752,27015362,2025-09-30,34237,,386.0,AH_CLEAR,CWTH,Check,,C,...,,,,2025-09-30 20:44:01,,,,,611161024,


In [11]:
df['acctnbr'] = df['acctnbr'].astype(str)
df['rtxnnbr'] = df['rtxnnbr'].astype(str)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1052754 entries, 0 to 1052753
Data columns (total 42 columns):
 #   Column            Non-Null Count    Dtype         
---  ------            --------------    -----         
 0   acctnbr           1052754 non-null  object        
 1   rundate           1052754 non-null  datetime64[ns]
 2   rtxnnbr           1052754 non-null  object        
 3   parentrtxnnbr     69526 non-null    float64       
 4   applnbr           978056 non-null   float64       
 5   applname          978056 non-null   object        
 6   rtxntypcd         1052754 non-null  object        
 7   rtxntypdesc       1052754 non-null  object        
 8   holdacctnbr       139011 non-null   float64       
 9   currrtxnstatcd    1052754 non-null  object        
 10  tranamt           1052754 non-null  object        
 11  origpostdate      1052754 non-null  datetime64[ns]
 12  rtxnreasoncd      0 non-null        object        
 13  actdatetime       1052754 non-null  dateti

In [13]:
df['composite_key'] = df['acctnbr'] + df['rtxnnbr']

In [14]:
df = df[df['rtxnstatcd'] == 'C'].copy()

In [None]:
df