In [57]:
# reporting/quarterly_report/modules/granting.py
from __future__ import annotations

import logging, sqlite3, datetime
from pathlib import Path
from typing import List
import calendar
import pandas as pd
from datetime import date
from typing import List, Tuple,Union
import numpy as np
from great_tables import GT, loc, style, html
import re

# our project
from ingestion.db_utils import (
    init_db,                                 # create tables if missing
    fetch_latest_table_data,                 # new version!
    get_alias_last_load,
    get_variable_status, 
    load_report_params                   # to inspect results
)

from reporting.quarterly_report.utils import RenderContext, BaseModule
from reporting.quarterly_report.report_utils.granting_utils import enrich_grants, _ensure_timedelta_cols, _coerce_date_columns
from reporting.quarterly_report.utils import Database, RenderContext


import altair as alt
from altair_saver import save
import selenium.webdriver



# ─────────────────────────────────────────────────────────────
# 2) open DB – change path if you work on a copy
# ─────────────────────────────────────────────────────────────
db_path = "database/reporting.db"
DB_PATH = Path("database/reporting.db")

init_db(db_path=DB_PATH)            # no-op if tables already exist

db = Database(str(DB_PATH))         # thin sqlite3 wrapper
conn = db.conn
report = 'Quarterly_Report'

CALLS_TYPES_LIST = ['STG', 'ADG', 'POC', 'COG', 'SYG', 'StG', 'CoG', 'AdG', 'SyG', 'PoC', 'CSA']
# ──────────────────────────────────────────────────────────────
# HELPERS
# ──────────────────────────────────────────────────────────────

def determine_epoch_year(cutoff_date: pd.Timestamp) -> int:
    """
    Returns the correct reporting year.
    If the cutoff is in January, then we are reporting for the *previous* year.
    """
    return cutoff_date.year - 1 if cutoff_date.month == 1 else cutoff_date.year



def get_scope_start_end(cutoff: pd.Timestamp) -> Tuple[pd.Timestamp, pd.Timestamp]:
    """
    Unified scope logic with year transition:
    • If cutoff is in January → report full previous year
    • Otherwise → return start of year to quarter-end
    """
    if cutoff.month == 1:
        year = cutoff.year - 1
        return pd.Timestamp(year=year, month=1, day=1), pd.Timestamp(year=year, month=12, day=31)

    def quarter_end(cutoff: pd.Timestamp) -> pd.Timestamp:
        first_day = cutoff.replace(day=1)
        last_month = first_day - pd.offsets.MonthBegin()
        m = last_month.month

        if m <= 3:
            return pd.Timestamp(year=cutoff.year, month=3, day=31)
        elif m <= 6:
            return pd.Timestamp(year=cutoff.year, month=6, day=30)
        elif m <= 9:
            return pd.Timestamp(year=cutoff.year, month=9, day=30)
        else:
            return pd.Timestamp(year=cutoff.year, month=12, day=31)

    return pd.Timestamp(year=cutoff.year, month=1, day=1), quarter_end(cutoff)



def months_in_scope(cutoff: pd.Timestamp) -> list[str]:
    """
    Returns list of month names from January to last *full* month before cutoff.
    Handles year rollover if cutoff is in January.
    """
    if cutoff.month == 1:
        year = cutoff.year - 1
        end_month = 12
    else:
        year = cutoff.year
        end_month = cutoff.month - 1

    months = pd.date_range(
        start=pd.Timestamp(year=year, month=1, day=1),
        end=pd.Timestamp(year=year, month=end_month, day=1),
        freq="MS"
    ).strftime("%B").tolist()

    return months

def determine_po_category(row):

    instrument = str(row.get('Instrument', '')).strip()
    topic = str(row.get('Topic', '')).strip()

    try:
        if topic and any(call_type in topic for call_type in CALLS_TYPES_LIST):
            category = next(call_type for call_type in CALLS_TYPES_LIST if call_type in topic).upper()
            return category
        elif instrument and any(call_type in instrument for call_type in CALLS_TYPES_LIST):
            category = next(call_type for call_type in CALLS_TYPES_LIST if call_type in instrument).upper()
            return category
        return ''
    except Exception as e:
        raise

def determine_po_category_po_list(row):

    summa = str(row.get('PO Purchase Order Item Desc', '')).strip()
    abac = str(row.get('PO ABAC SAP Reference', '')).strip()

    try:
        if summa and any(call_type in summa for call_type in CALLS_TYPES_LIST):
            category = next(call_type for call_type in CALLS_TYPES_LIST if call_type in summa).upper()
            return category
        elif abac and any(call_type in abac for call_type in CALLS_TYPES_LIST):
            category = next(call_type for call_type in CALLS_TYPES_LIST if call_type in abac).upper()
            return category
        return ''
    except Exception as e:
        raise

def extract_project_number(row):
    """
    Extract project number from 'Inv Text' if 'v_check_payment_type' contains RP patterns,
    otherwise return original 'v_check_payment_type' value
    """
    payment_type = row['v_check_payment_type']
    inv_text = row['Inv Text']
    
    # Handle NaN values
    if pd.isna(payment_type):
        return payment_type
    
    # Convert to string to handle any data type
    payment_type_str = str(payment_type)
    
    # Check if the payment_type contains RP patterns:
    # - Original pattern: RP + number + = + FP/IP (e.g., RP4=FP, RP2=IP)
    # - New pattern: RP + number + - + FP/IP (e.g., RP4-FP, RP2-IP)
    rp_patterns = [
        r'RP\d+=(?:FP|IP)',  # Original pattern: RP4=FP, RP2=IP, etc.
        r'RP\d+-(?:FP|IP)'   # New pattern: RP4-FP, RP2-IP, etc.
    ]
    
    # Check if any of the RP patterns match
    has_rp_pattern = any(re.search(pattern, payment_type_str) for pattern in rp_patterns)
    
    if has_rp_pattern:
        # Extract the numerical part from Inv Text column
        if pd.notna(inv_text):
            inv_text_str = str(inv_text).strip()
            # Extract leading digits from Inv Text
            number_match = re.match(r'^(\d+)', inv_text_str)
            if number_match:
                return number_match.group(1)
        
        # If no number found in Inv Text, return original payment_type
        return payment_type
    
    # Return original v_check_payment_type if no RP pattern found
    return payment_type


def map_project_to_call_type(project_num, mapping_dict):
    # If it's a numeric string, try to convert and lookup
    try:
        # Try to convert to int for lookup
        numeric_key = int(project_num)
        if numeric_key in mapping_dict:
            return mapping_dict[numeric_key]
    except (ValueError, TypeError):
        # If conversion fails, it's a non-numeric string like 'EXPERTS'
        pass
    
    # Return original value if no match found
    return project_num

def map_call_type_with_experts(row, grant_map):
    """
    Map call_type based on project_number and Inv Parking Person Id
    """
    project_num = row['project_number']
    contract_type = row['v_payment_type']
    
    # First, try to map using grant_map (convert project_num to int if possible)
    try:
        numeric_key = int(project_num)
        if numeric_key in grant_map:
            return grant_map[numeric_key]
    except (ValueError, TypeError):
        pass
    
    # If project_number is 'EXPERTS', keep it as 'EXPERTS'
    if str(project_num).upper() == 'EXPERTS' or str(contract_type).upper() == 'EXPERTS':
        return 'EXPERTS'
    
    # Return original project_number if no conditions are met
    return project_num

def map_payment_type(row):
    if row['v_payment_type'] == 'Other' and row['Pay Workflow Last AOS Person Id'] == 'WALASOU':
        return 'EXPERTS'
    return row['v_payment_type']

# Instead, handle conversion in the mapping function
def safe_map_project_to_call_type(project_num, mapping_dict):
    """
    Maps project number to call type, handles all data type issues internally
    """
    try:
        # Handle NaN values
        if pd.isna(project_num):
            return None
            
        # Convert whatever format to integer for lookup
        if isinstance(project_num, str):
            # Handle strings like '4500053782.0'
            if project_num.endswith('.0'):
                numeric_key = int(project_num[:-2])
            else:
                numeric_key = int(float(project_num))
        else:
            # Handle numeric values (float/int)
            numeric_key = int(float(project_num))
            
        # Lookup in mapping dictionary
        if numeric_key in mapping_dict:
            result = mapping_dict[numeric_key]
            if pd.notna(result) and result != '':
                return result
                
    except (ValueError, TypeError, OverflowError):
        # Any conversion error, return None
        pass
    
    return None

# Apply mapping without converting the whole column
def apply_conditional_mapping(row):
    current_call_type = row['call_type']
    po_key = row['PO Purchase Order Key']  # Use as-is, no conversion
    
    should_map = (
        pd.isna(current_call_type) or 
        current_call_type == '' or 
        current_call_type not in CALLS_TYPES_LIST or 
        current_call_type in ['EXPERTS', 'CSA']
    )
    
    if should_map:
        mapped_value = safe_map_project_to_call_type(po_key, po_map)
        return mapped_value if mapped_value is not None else current_call_type
    else:
        return current_call_type



In [66]:
PAYMENTS_ALIAS = "payments_summa"
CALLS_ALIAS = 'call_overview'
PAYMENTS_TIMES_ALIAS = 'payments_summa_time'
PO_ALIAS = 'c0_po_summa'

cutoff = pd.to_datetime("2025-04-15")
report_params = load_report_params(report_name=report, db_path=db_path)


table_colors = report_params.get('TABLE_COLORS', {})
BLUE = table_colors.get("BLUE", "#004A99")
LIGHT_BLUE = table_colors.get("LIGHT_BLUE", "#d6e6f4")
DARK_BLUE = table_colors.get("DARK_BLUE", "#01244B")
SUB_TOTAL_BACKGROUND = table_colors.get("subtotal_background_color", "#E6E6FA")

df_paym = fetch_latest_table_data(conn, PAYMENTS_ALIAS, cutoff)
df_paym_times = fetch_latest_table_data(conn, PAYMENTS_TIMES_ALIAS, cutoff)
df_calls =  fetch_latest_table_data(conn, CALLS_ALIAS , cutoff)
df_po = fetch_latest_table_data(conn, PO_ALIAS, cutoff)

DEBUG:root:Fetching latest data for table_alias: payments_summa, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query results for payments_summa: [('2025-06-03T06:26:12.795557', 1)]
DEBUG:root:Checking upload_id: 1, uploaded_at: 2025-06-03T06:26:12.795557
DEBUG:root:Fetched 6391 rows from payments_summa with upload_id 1
DEBUG:root:Fetching latest data for table_alias: payments_summa_time, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query results for payments_summa_time: [('2025-06-03T06:26:38.788420', 2)]
DEBUG:root:Checking upload_id: 2, uploaded_at: 2025-06-03T06:26:38.788420
DEBUG:root:Fetched 4992 rows from payments_summa_time with upload_id 2
DEBUG:root:Fetching latest data for table_alias: call_overview, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query results for call_overview: [('2025-06-03T06:28:40.771822', 3)]
DEBUG:root:Checking upload_id: 3, uploaded_at: 2025-06-03T06:28:40.771822
DEBUG:root:Fetched 13295 rows from call_overview with upload_id 3
DEBUG:root:F

In [67]:
df_paym['v_payment_type'] = df_paym.apply(map_payment_type, axis=1)
# Filter the dataframe
df_paym = df_paym[df_paym['Pay Document Type Desc'].isin(['Payment Directive', 'Exp Pre-financing'])]
# Keep all rows where v_payment_type is not 'Other'
df_paym = df_paym[df_paym['v_payment_type'] != 'Other']
df_paym = df_paym[df_paym['Pay Payment Key'].notnull()]

df_paym['project_number'] = df_paym.apply(extract_project_number, axis=1)

# Assuming your DataFrame is called 'df'
df_calls['CALL_TYPE'] = df_calls.apply(determine_po_category, axis=1)
grant_map = df_calls.set_index('Grant Number')['CALL_TYPE'].to_dict()

#PO ORDERS MAP
df_po['CALL_TYPE']  = df_po.apply(determine_po_category_po_list, axis=1)

po_map = df_po[
    df_po['CALL_TYPE'].notna() & 
    (df_po['CALL_TYPE'].str.strip() != '')
].set_index('PO Purchase Order Key')['CALL_TYPE'].to_dict()

# Apply the mapping
df_paym['call_type'] = df_paym['project_number'].apply(lambda x: map_project_to_call_type(x, grant_map))
df_paym['call_type'] = df_paym.apply(lambda row: map_call_type_with_experts(row, grant_map), axis=1)


# Clean call_type column only (not PO keys)
df_paym['call_type'] = df_paym['call_type'].astype(str).str.strip().replace(['nan', ''], np.nan)
# Apply the mapping
df_paym['call_type'] = df_paym.apply(apply_conditional_mapping, axis=1)
# This preserves NaN values as NaN instead of causing errors
df_paym['PO Purchase Order Key'] = pd.to_numeric(df_paym['PO Purchase Order Key'], errors='coerce').astype('Int64')

df_paym['Pay Workflow Last AOS Action Date'] = pd.to_datetime(
    df_paym['Pay Workflow Last AOS Action Date'], 
    format='%Y-%m-%d %H:%M:%S',
    errors='coerce'
)

quarter_dates = get_scope_start_end(cutoff=cutoff)
last_valid_date = quarter_dates[1]

df_paym = df_paym[
    df_paym['Pay Workflow Last AOS Action Date'] <= last_valid_date
].copy()

df_paym = df_paym[df_paym['call_type'] != 'CSA']

In [69]:
df_paym.to_excel('paym.xlsx')