In [53]:
# reporting/quarterly_report/modules/granting.py
from __future__ import annotations

import logging, sqlite3, datetime
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd
from great_tables import GT, loc, style, html

# our project
from ingestion.db_utils import (
    init_db,                                 # create tables if missing
    fetch_latest_table_data,                 # new version!
    get_alias_last_load,
    get_variable_status,                     # to inspect results
)

from reporting.quarterly_report.utils import RenderContext, BaseModule
from reporting.quarterly_report.report_utils.granting_utils import enrich_grants, _ensure_timedelta_cols, _coerce_date_columns
from reporting.quarterly_report.utils import Database, RenderContext

# ─────────────────────────────────────────────────────────────
# 2) open DB – change path if you work on a copy
# ─────────────────────────────────────────────────────────────
DB_PATH = Path("database/reporting.db")
init_db(db_path=DB_PATH)            # no-op if tables already exist

db = Database(str(DB_PATH))         # thin sqlite3 wrapper
conn = db.conn


# ──────────────────────────────────────────────────────────────
# constants – adapt whenever a file-alias changes
# ──────────────────────────────────────────────────────────────

PO_SUMMA_ALIAS = "c0_po_summa"
cutoff = pd.to_datetime("2025-04-15")

df_summa  = fetch_latest_table_data(conn, PO_SUMMA_ALIAS,   cutoff)



CALLS_TYPES_LIST = ['STG','ADG','POC','COG','SYG','StG','CoG','AdG','SyG', 'PoC']
PROGRAMMES_LIST = ['HORIZONEU_21_27', 'H2020_14_20']
FUND_SOURCES = ['VOBU', 'EARN/N', 'EFTA' , 'IAR2/2']

df_summa_filtered = df_summa[df_summa['Functional Area'].isin(PROGRAMMES_LIST)]
df_summa_filtered = df_summa_filtered[df_summa_filtered['Fund Source'].isin(FUND_SOURCES)]


DEBUG:root:Fetching latest data for table_alias: c0_po_summa, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query result for c0_po_summa: ('2025-05-13T07:10:54.298751', 6)
DEBUG:root:Selected upload_id: 6, uploaded_at: 2025-05-13T07:10:54.298751
DEBUG:root:Fetched 15927 rows from c0_po_summa


In [54]:
# Function to determine PO_CATEGORY based on the rules
def determine_po_category(row):
    po_category_desc = str(row.get('PO Category Desc', '')).strip()
    po_abac_sap_ref = str(row.get('PO ABAC SAP Reference', '')).strip()
    po_purchase_order_desc = str(row.get('PO Purchase Order Desc', '')).strip()

    if po_category_desc == 'Grant':
        # Check PO ABAC SAP Reference first
        if po_abac_sap_ref and any(call_type in po_abac_sap_ref for call_type in CALLS_TYPES_LIST):
            return next(call_type for call_type in CALLS_TYPES_LIST if call_type in po_abac_sap_ref).upper()
        # If empty or no match, check PO Purchase Order Desc
        elif po_purchase_order_desc and any(call_type in po_purchase_order_desc for call_type in CALLS_TYPES_LIST):
            return next(call_type for call_type in CALLS_TYPES_LIST if call_type in po_purchase_order_desc).upper()
        return ''  # Return empty if no match found
    elif po_category_desc in ['Direct Contract', 'Specific Contract']:
        return 'Experts'
    return ''  # Default value for other cases

# Ensure df_summa_filtered is a new DataFrame to avoid SettingWithCopyWarning
df_summa_filtered = df_summa_filtered.copy()

# Apply the function to create the new column using .loc
df_summa_filtered.loc[:, 'PO_CATEGORY'] = df_summa_filtered.apply(determine_po_category, axis=1)

# Define the mapping dictionary
programme_mapping = {
    'HORIZONEU_21_27': 'HE',
    'H2020_14_20': 'H2020'
}

# Create the new column 'Programme' based on 'Functional Area'
df_summa_filtered['Programme'] = df_summa_filtered['Functional Area'].map(programme_mapping).fillna('')



In [55]:
# Perform aggregation by PO Purchase Order Key
aggregated_df = df_summa_filtered.groupby('PO Purchase Order Key').agg({
    'PO Open Amount - RAL - Payments Made (PD Approved)': 'sum',  # Sum the numeric column
    'Programme': 'first',  # Take the first non-null value (assuming consistency)
    'PO_CATEGORY': 'first',  # Take the first non-null value (assuming consistency)
    'PO Final Date of Implementation (dd/mm/yyyy)': 'max'  # Take the maximum (latest) date
}).reset_index()

# Rename the aggregated column for clarity (optional)
aggregated_df = aggregated_df.rename(columns={
    'PO Open Amount - RAL - Payments Made (PD Approved)': 'Total_Open_Amount',
    'PO Final Date of Implementation (dd/mm/yyyy)': 'PO Final Date of Implementation'
})

# Filter to keep only rows where Total_Open_Amount > 0
aggregated_df = aggregated_df[aggregated_df['Total_Open_Amount'] > 0]

# Compute total commitments with RAL by PO_CATEGORY for the filtered data
total_ral_by_category = aggregated_df.groupby('PO_CATEGORY')['PO Purchase Order Key'].count().reset_index()
total_ral_by_category = total_ral_by_category.rename(columns={'PO Purchase Order Key': 'Total commitments with RAL'})

# Ensure Latest_Validity_Period_End is in datetime format after aggregation
aggregated_df['PO Final Date of Implementation'] = pd.to_datetime(
    aggregated_df['PO Final Date of Implementation'], 
    format='%Y-%m-%d %H:%M:%S',  # Match the format from the table
    errors='coerce'
)
# Verify the result
aggregated_df

Unnamed: 0,PO Purchase Order Key,Total_Open_Amount,Programme,PO_CATEGORY,PO Final Date of Implementation
0,4500008168,512903.30,HE,STG,2031-06-30
1,4500008179,851572.05,HE,ADG,2031-03-31
2,4500008180,1442383.75,HE,STG,2032-03-31
3,4500008210,870804.55,HE,ADG,2031-04-30
4,4500008212,523214.30,HE,STG,2031-07-31
...,...,...,...,...,...
7589,4500093527,1997169.00,HE,COG,2031-10-31
7590,4500093536,1990000.00,HE,COG,2031-10-31
7591,4500093589,150000.00,HE,POC,2028-08-31
7592,4500093803,2380750.00,HE,COG,2032-02-29


In [56]:
total_ral_by_category

Unnamed: 0,PO_CATEGORY,Total commitments with RAL
0,,3
1,ADG,1531
2,COG,2230
3,Experts,30
4,POC,597
5,STG,2977
6,SYG,208


In [58]:
# Filter to keep only rows where Latest_Validity_Period_End <= cutoff
aggregated_df = aggregated_df[
    aggregated_df['PO Final Date of Implementation'].notna() & 
    (aggregated_df['PO Final Date of Implementation'] <= cutoff)
]
# Compute the number of days elapsed from Latest_Validity_Period_End to cutoff
aggregated_df['Days_Elapsed_From_Cutoff'] = (cutoff - aggregated_df['PO Final Date of Implementation']).dt.days
aggregated_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aggregated_df['Days_Elapsed_From_Cutoff'] = (cutoff - aggregated_df['PO Final Date of Implementation']).dt.days


Unnamed: 0,PO Purchase Order Key,Total_Open_Amount,Programme,PO_CATEGORY,PO Final Date of Implementation,Days_Elapsed_From_Cutoff
4540,4500047266,30000.0,H2020,POC,2024-10-31,166
4672,4500047834,377451.59,H2020,COG,2025-03-31,15
4686,4500047908,105248.29,H2020,STG,2024-09-30,197
6032,4500054067,208174.7,H2020,COG,2024-10-31,166


In [59]:
# Categorize based on Days_Elapsed_From_Cutoff
def categorize_days(days):
    if 0 <= days <= 60:
        return "Within 2 months"
    elif 61 <= days <= 180:
        return "Between 2 and 6 months"
    elif days > 180:
        return "More than 6 months"
    else:
        return "Overdue"  # Should not occur due to the <= cutoff filter

aggregated_df['Category'] = aggregated_df['Days_Elapsed_From_Cutoff'].apply(categorize_days)

aggregated_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aggregated_df['Category'] = aggregated_df['Days_Elapsed_From_Cutoff'].apply(categorize_days)


Unnamed: 0,PO Purchase Order Key,Total_Open_Amount,Programme,PO_CATEGORY,PO Final Date of Implementation,Days_Elapsed_From_Cutoff,Category
4540,4500047266,30000.0,H2020,POC,2024-10-31,166,Between 2 and 6 months
4672,4500047834,377451.59,H2020,COG,2025-03-31,15,Within 2 months
4686,4500047908,105248.29,H2020,STG,2024-09-30,197,More than 6 months
6032,4500054067,208174.7,H2020,COG,2024-10-31,166,Between 2 and 6 months


In [64]:
# Aggregate by PO_CATEGORY
agg_result = aggregated_df.groupby(['Programme','PO_CATEGORY', 'Category']).agg({
    'PO Purchase Order Key': 'count',  # Count of items
   
}).reset_index()
# Merge the total commitments with RAL into the aggregated result

agg_result 

Unnamed: 0,Programme,PO_CATEGORY,Category,PO Purchase Order Key
0,H2020,COG,Between 2 and 6 months,1
1,H2020,COG,Within 2 months,1
2,H2020,POC,Between 2 and 6 months,1
3,H2020,STG,More than 6 months,1


In [66]:
# Pivot the agg_result to align with the table structure
pivot_result = agg_result.pivot_table(

    columns=['PO_CATEGORY','Programme','Category'],
    values=['Count', 'PO Purchase Order Key'],
    fill_value=0
).reset_index()

KeyError: 'Count'

In [None]:
# Pivot the agg_result to align with the table structure
pivot_result = agg_result.pivot_table(
    index='PO_CATEGORY',
    columns='Category',
    values=['Count', 'Total commitments with RAL'],
    fill_value=0
).reset_index()

# Flatten the multi-level columns
pivot_result.columns = ['_'.join(col).strip() for col in pivot_result.columns.values]
pivot_result = pivot_result.rename(columns={
    'PO_CATEGORY_': 'Type of Grant',
    'Count_Within 2 months': 'Within 2 months',
    'Count_Between 2 and 6 months': 'Between 2 and 6 months',
    'Count_More than 6 months': 'More than 6 months',
    'Count_Overdue': 'Total Overdue',
    'Total commitments with RAL_Within 2 months': 'Total commitments with RAL_Within 2 months',
    'Total commitments with RAL_Between 2 and 6 months': 'Total commitments with RAL_Between 2 and 6 months',
    'Total commitments with RAL_More than 6 months': 'Total commitments with RAL_More than 6 months',
    'Total commitments with RAL_Overdue': 'Total commitments with RAL_Overdue'
})

# Add Total Overdue and % of Overdue grants (should be 0% due to filter)
pivot_result['Total Overdue'] = 0  # No overdue since filtered to <= cutoff
pivot_result['% of Overdue/grants'] = 0.0  # 0% since no overdue

# Merge with total commitments with RAL by PO_CATEGORY
pivot_result = pivot_result.merge(total_ral_by_category, on='Type of Grant', how='left', suffixes=('', '_total'))
pivot_result = pivot_result.rename(columns={'Total commitments with RAL_total': 'Total commitments with RAL'})

# Calculate totals
total_counts = pivot_result[['Within 2 months', 'Between 2 and 6 months', 'More than 6 months', 'Total Overdue']].sum()
total_ral = pivot_result['Total commitments with RAL'].sum()
pivot_result.loc[len(pivot_result)] = ['Total', total_counts['Within 2 months'], total_counts['Between 2 and 6 months'], 
                                      total_counts['More than 6 months'], total_counts['Total Overdue'], 0.0, total_ral]