In [None]:
import os
import sys
from pathlib import Path

# Navigate to project root (equivalent to cd ..)
project_dir = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
os.chdir(project_dir)

# Add src directory to Python path for imports
src_dir = project_dir / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Set environment for dev testing
os.environ['REPORT_ENV'] = 'dev'

In [None]:
project_dir

In [None]:
"""
Active Account & Agreement Analysis - Main Entry Point

BUSINESS LOGIC EXTRACTED:

Data Sources & Relationships:
- daily_acct_file: Active accounts dataset (already available)
- WH_AGREEMENTS: Agreement data with OWNERORGNBR and OWNERPERSNBR
- WH_ALLROLES: Links agreements to active accounts via role relationships
- WH_ORG: Organization names (orgnbr matches OWNERORGNBR)
- WH_PERS: Person names (persnbr matches OWNERPERSNBR)

Business Rules:
- Active accounts linked to agreements through WH_ALLROLES
- Agreements filtered for active status only
- Organization and person names added from WH_ORG/WH_PERS
- Deduplication applied to org/pers tables on primary keys
- Primary keys enforced as string type for consistency

Data Processing Flow:
1. Load active accounts from daily_acct_file
2. Load and deduplicate WH_ORG and WH_PERS with schema enforcement
3. Load WH_AGREEMENTS and link to accounts via WH_ALLROLES
4. Filter for active agreements only
5. Add organization and person names
6. Output two datasets: active accounts and active agreements
7. Monthly delivery to Retail Department for cross-sell analysis

Business Intelligence Value:
- Cross-sell opportunity analysis for retail
- Active account and agreement relationship mapping
- Customer engagement and product penetration insights
- Monthly reporting for retail department initiatives
"""
from pathlib import Path
from typing import List
from datetime import datetime

import pandas as pd # type: ignore

import src.config
import src.active_acct_analysis.fetch_data # type: ignore
from cdutils import input_cleansing # type: ignore
from cdutils import deduplication # type: ignore
import cdutils.acct_file_creation.core # type: ignore


# def main():
#     """Main report execution function for Active Account & Agreement Analysis"""
    
# Ensure output directory exists
src.config.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Environment: {src.config.ENV}")
print(f"Output directory: {src.config.OUTPUT_DIR}")

# Step 1: Load active accounts from daily_acct_file
print("Loading active accounts...")
active_accounts = cdutils.acct_file_creation.core.query_df_on_date()
print(f"Loaded {len(active_accounts)} active accounts")

# Step 2: Load base data
print("Loading base data...")
data = src.active_acct_analysis.fetch_data.fetch_data()



In [None]:
# Step 3: Load and prepare WH_ORG with deduplication
print("Loading WH_ORG...")
wh_org = data['wh_org'].copy()

# Enforce schema for WH_ORG - ensure orgnbr is string
schema_wh_org = {
    'orgnbr': str,
    'orgname': str
}
wh_org = input_cleansing.enforce_schema(wh_org, schema_wh_org)

# Deduplicate WH_ORG using cdutils.deduplication
dedupe_list = [{'df': wh_org, 'field': 'orgnbr'}]
wh_org = deduplication.dedupe(dedupe_list)
print(f"Loaded {len(wh_org)} unique organizations")

# Step 4: Load and prepare WH_PERS with deduplication
print("Loading WH_PERS...")
wh_pers = data['wh_pers'].copy()

# Enforce schema for WH_PERS - ensure persnbr is string
schema_wh_pers = {
    'persnbr': str,
    'persname': str,
}
wh_pers = input_cleansing.enforce_schema(wh_pers, schema_wh_pers)

# Deduplicate WH_PERS using cdutils.deduplication
dedupe_list = [{'df': wh_pers, 'field': 'persnbr'}]
wh_pers = deduplication.dedupe(dedupe_list)
print(f"Loaded {len(wh_pers)} unique persons")



In [None]:
wh_agreements

In [None]:
wh_agreements.info()

In [None]:
# Step 5: Load WH_AGREEMENTS and filter for active agreements
print("Loading WH_AGREEMENTS...")
wh_agreements = data['wh_agreement'].copy()

# Enforce schema for WH_AGREEMENTS
schema_wh_agreements = {
    'acctnbr': str,
    'agreenbr': str,
    'persnbr': str,
    'ownerpersnbr': str,
    'ownerorgnbr': str
}
wh_agreements = input_cleansing.enforce_schema(wh_agreements, schema_wh_agreements)

# Filter for active agreements using inactivedate vs rundate logic
# If inactivedate is null OR inactivedate > rundate, then agreement is active
active_agreements = wh_agreements[
    (wh_agreements['inactivedate'].isnull()) | 
    (wh_agreements['inactivedate'] > wh_agreements['rundate'])
].copy()

print(f"Found {len(active_agreements)} active agreements out of {len(wh_agreements)} total")

# Step 6: Add organization and person names to agreements
print("Adding organization and person names...")

# Add organization names for agreements with ownerorgnbr
active_agreements = pd.merge(
    active_agreements,
    wh_org[['orgnbr', 'orgname']],
    left_on='ownerorgnbr',
    right_on='orgnbr',
    how='left',
    suffixes=('', '_org')
)

# Add person names for agreements with ownerpersnbr  
active_agreements = pd.merge(
    active_agreements,
    wh_pers[['persnbr', 'persname']],
    left_on='ownerpersnbr',
    right_on='persnbr',
    how='left',
    suffixes=('', '_pers')
)

# Create a combined owner name field
active_agreements['owner_name'] = active_agreements.apply(
    lambda row: row['orgname'] if pd.notna(row['orgname']) 
    else f"{row['persname']}" if pd.notna(row['persname'])
    else 'Unknown Owner',
    axis=1
)

# Step 7: Prepare final datasets
print("Preparing final datasets...")

# Active Accounts Dataset - keep as is
active_accounts_final = active_accounts.copy()



In [None]:
# Active Agreements Dataset - select relevant columns
active_agreements_final = active_agreements[[
    'acctnbr', 'agreenbr', 'persnbr', 'agrmntnbr', 'agrmntstatcd',
    'ownerpersnbr', 'ownerorgnbr', 'owner_name', 'rundate', 'effdate', 'inactivedate',
    'agreetypcd', 'cardnbr', 'datelastmaint'
]].copy()

# Step 8: Output to Excel files
print("Generating output files...")

# Generate filename with current date
today = datetime.today()
date_str = f"{today.strftime('%B')} {today.day} {today.year}"

# Output Active Accounts
accounts_filename = f'Active Accounts {date_str}.xlsx'
accounts_output_path = src.config.OUTPUT_DIR / accounts_filename
active_accounts_final.to_excel(accounts_output_path, sheet_name='Active Accounts', index=False)
print(f"Active accounts saved to: {accounts_output_path}")

# Output Active Agreements
agreements_filename = f'Active Agreements {date_str}.xlsx'
agreements_output_path = src.config.OUTPUT_DIR / agreements_filename
active_agreements_final.to_excel(agreements_output_path, sheet_name='Active Agreements', index=False)
print(f"Active agreements saved to: {agreements_output_path}")

# Summary statistics
print(f"\nSummary:")
print(f"- Active Accounts: {len(active_accounts_final)}")
print(f"- Active Agreements: {len(active_agreements_final)}")
print(f"- Unique Agreement Numbers: {active_agreements_final['agrmntnbr'].nunique()}")
print(f"- Unique Account Numbers in Agreements: {active_agreements_final['acctnbr'].nunique()}")
print(f"- Agreement Types: {active_agreements_final['agreetypcd'].value_counts().to_dict()}")

# Distribution (currently disabled - enable when recipients are determined)
if src.config.EMAIL_TO:  # Only send emails if recipients are configured
    from cdutils import distribution # type: ignore
    
    email_subject = f"Active Account & Agreement Analysis - {date_str}"
    
    email_body = """Hi,

Attached are the Active Account and Agreement datasets for cross-sell analysis. 

The files include:
1. Active Accounts - Current active account portfolio
2. Active Agreements - Active agreements with owner information (filtered by inactivedate vs rundate)

If you have any questions, please reach out to BusinessIntelligence@bcsbmail.com

Thanks!"""
    
    distribution.email_out(
        recipients=src.config.EMAIL_TO, 
        bcc_recipients=src.config.EMAIL_CC, 
        subject=email_subject, 
        body=email_body, 
        attachment_paths=[accounts_output_path, agreements_output_path]
    )
    print(f"Email sent to {len(src.config.EMAIL_TO)} recipients with {len(src.config.EMAIL_CC)} CC")
else:
    print(f"Development mode or no recipients configured - email not sent.")
    print(f"Output files: {accounts_output_path}, {agreements_output_path}")


# if __name__ == '__main__':
#     print("Starting Active Account & Agreement Analysis")
#     main()
#     print("Complete!")