In [1]:
# ─────────────────────────────────────────────────────────────
# 0) one-off: hot-reload the local packages
# ─────────────────────────────────────────────────────────────
%load_ext autoreload
%autoreload 2


In [2]:
# ─────────────────────────────────────────────────────────────
# 1) imports & helpers
# ─────────────────────────────────────────────────────────────
import pandas as pd
from pathlib import Path
from datetime import datetime

# our project
from ingestion.db_utils import (
    init_db,                                 # create tables if missing
    fetch_latest_table_data,                 # new version!
    get_alias_last_load,
    get_variable_status,                     # to inspect results
)
from reporting.quarterly_report.utils import Database, RenderContext
from reporting.quarterly_report.modules.granting import GrantsModule


  pd.date_range("2000-01-01", periods=12, freq="M").strftime("%B")


In [3]:
# ─────────────────────────────────────────────────────────────
# 2) open DB – change path if you work on a copy
# ─────────────────────────────────────────────────────────────
DB_PATH = Path("database/reporting.db")
init_db(db_path=DB_PATH)            # no-op if tables already exist

db = Database(str(DB_PATH))         # thin sqlite3 wrapper
conn = db.conn


In [20]:
# reporting/quarterly_report/modules/granting.py
from __future__ import annotations

import logging, sqlite3, datetime
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd
from great_tables import GT, loc, style, html

from ingestion.db_utils import (
    fetch_latest_table_data,
    insert_variable,
)
from reporting.quarterly_report.utils import RenderContext, BaseModule
from reporting.quarterly_report.report_utils.granting_utils import enrich_grants, _ensure_timedelta_cols, _coerce_date_columns


# ──────────────────────────────────────────────────────────────
# constants – adapt whenever a file-alias changes
# ──────────────────────────────────────────────────────────────
CALL_OVERVIEW_ALIAS   = "call_overview"
BUDGET_FOLLOWUP_ALIAS = "budget_follow_up_report"
ETHICS_ALIAS          = "ethics_requirements_and_issues"

EXCLUDE_TOPICS        = [
    "ERC-2023-SJI-1", "ERC-2023-SJI",
    "ERC-2024-PERA",
    "HORIZON-ERC-2022-VICECHAIRS-IBA",
    "HORIZON-ERC-2023-VICECHAIRS-IBA",
    "ERC-2025-NCPS-IBA"
]

MONTHS_ORDER = list(
    pd.date_range("2000-01-01", periods=12, freq="ME").strftime("%B")
)


SIGNED_STATI = {"SIGNED", "SUSPENDED", "TERMINATED", "CLOSED"}

cutoff = pd.to_datetime("2025-04-15")
# ---------------------------------------------------------------------------
# little helper – converts a DataFrame → Great-Tables with very plain style
# ---------------------------------------------------------------------------
def _df_to_gt(df: pd.DataFrame, title: str) -> GT:
    return (
        GT(df.reset_index(drop=True))
        .tab_header(title)
        .opt_table_font(font="Arial")
        .tab_style(style=[style.text(weight="bold")], locations=loc.column_labels())
    )

def months_in_scope(cutoff: pd.Timestamp) -> list[str]:
    """
    Return month-names from January up to the **last month that ended
    *before* the cut-off month**.

    • cut-off 15-Apr-2025 → Jan Feb Mar
    • cut-off  1-May-2025 → Jan … Apr
    """
    first_day_of_cutoff = cutoff.replace(day=1)
    last_full_month     = first_day_of_cutoff - pd.offsets.MonthBegin()   # ← one month earlier

    months = pd.date_range(
        start=pd.Timestamp(year=cutoff.year, month=1, day=1),
        end=last_full_month,
        freq="MS",
    ).strftime("%B").tolist()

    return months


call_overview  = fetch_latest_table_data(conn, CALL_OVERVIEW_ALIAS,   cutoff)
budget_follow  = fetch_latest_table_data(conn, BUDGET_FOLLOWUP_ALIAS, cutoff)
ethics_df      = fetch_latest_table_data(conn, ETHICS_ALIAS,          cutoff)

for df, alias in [
    (call_overview, CALL_OVERVIEW_ALIAS),
    (budget_follow, BUDGET_FOLLOWUP_ALIAS),
    (ethics_df, ETHICS_ALIAS),
]:
    if df.empty:
        raise RuntimeError(
            f"GAP module: no rows found for alias '{alias}'. "
            "Upload data first (Single / Mass upload)."
        )

# ────────────────────────────────────────────────────────────
# 2) merge & clean
# ────────────────────────────────────────────────────────────
df1 = (
    call_overview
    .merge(budget_follow, left_on="Grant Number", right_on="Project Number")
    .reset_index()
    .drop_duplicates(subset="Grant Number", keep="last")
    .set_index("Grant Number")
    .sort_index()
)

df_grants = df1.merge(
    ethics_df,
    left_on="Grant Number", right_on="PROPOSAL\nNUMBER", how="inner"
)

COLS_TO_DROP: List[str] = []          #  ← fill when you know them
df_grants.drop(columns=[c for c in COLS_TO_DROP if c in df_grants.columns],
            inplace=True, errors="ignore")

_ensure_timedelta_cols(df_grants)
df_grants = enrich_grants(df_grants)
# make sure every date column really **is** datetime
_coerce_date_columns(df_grants)


scope_months = months_in_scope(cutoff)  

in_scope = (
            df_grants["Commitment AO visa"].dt.year.eq(cutoff.year)  &
            df_grants["Commitment AO visa"].dt.month_name().isin(scope_months)
        )

committed = df_grants.loc[in_scope].copy()
committed = committed[~committed["Topic"].isin(EXCLUDE_TOPICS)]

tab3_commit   = (
        committed.pivot_table(
            index=committed["Commitment AO visa"].dt.month_name(),
            columns="Topic",
            values="Eu contribution",
            aggfunc="sum",
            fill_value=0,
        )
        .reindex(scope_months)           # Jan-… only those in scope
        .reset_index(names="Commitment Month")
    )

tab3_commit["TOTAL"] = tab3_commit.iloc[:, 1:].sum(axis=1)

# Define a mapping of months to quarters
month_to_quarter = {
    "January": 1, "February": 1, "March": 1,
    "April": 2, "May": 2, "June": 2,
    "July": 3, "August": 3, "September": 3,
    "October": 4, "November": 4, "December": 4
}

# Add a quarter column to tab3_signed
tab3_commit["Quarter"] = tab3_commit["Commitment Month"].map(month_to_quarter)

# Determine the current quarter based on cutoff date (May 12, 2025 -> Quarter 2)
current_quarter = (cutoff.month - 1) // 3 + 1  # Quarter 2 for May

# Prepare final DataFrame with conditional quarterly aggregation
if not tab3_commit.empty:
    final_rows = []
    
    # Check if the data contains exactly three months
    unique_months = tab3_commit["Commitment Month"].nunique()
    max_quarter = tab3_commit["Quarter"].max()

    if unique_months == 3 and max_quarter == 1:
        # Special case: exactly three months, all in Quarter 1, show individually
        final_rows.append(tab3_commit.drop(columns=["Quarter"]))
    else:
        # General case: aggregate previous quarters, show current quarter months individually
        for quarter in sorted(tab3_commit["Quarter"].unique()):
            quarter_data = tab3_commit[tab3_commit["Quarter"] == quarter].copy()
            
            if quarter < current_quarter:
                # Aggregate previous quarters into a single row
                quarter_sum = quarter_data.iloc[:, 1:-1].sum(numeric_only=True)
                quarter_row = pd.DataFrame({
                    "Commitment Month": [f"Quarter {quarter}"],
                    **{col: [quarter_sum[col]] for col in quarter_data.columns[1:-2]},  # Topics
                    "TOTAL": [quarter_sum["TOTAL"]]
                })
                final_rows.append(quarter_row)
            else:
                # Keep individual months for the current quarter
                quarter_data = quarter_data.drop(columns=["Quarter"])
                final_rows.append(quarter_data)

    # Compute Grand Total
    col_totals = pd.DataFrame(tab3_commit.iloc[:, 1:-1].sum(), columns=["Grand Total"]).T
    col_totals.insert(0, "Commitment Month", "Grand Total")
    for col in tab3_commit.columns[1:-2]:  # Add totals for each topic column
        col_totals[col] = tab3_commit[col].sum()

    # Combine all rows
    agg_with_totals = pd.concat(final_rows + [col_totals], ignore_index=True)
else:
    agg_with_totals = tab3_commit

agg_with_totals['Type'] = 'amounts'

DEBUG:root:Fetching latest data for table_alias: call_overview, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query result for call_overview: ('2025-05-13T07:05:38.084275', 1)
DEBUG:root:Selected upload_id: 1, uploaded_at: 2025-05-13T07:05:38.084275


DEBUG:root:Fetched 13295 rows from call_overview
DEBUG:root:Fetching latest data for table_alias: budget_follow_up_report, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query result for budget_follow_up_report: ('2025-05-13T07:06:27.613717', 2)
DEBUG:root:Selected upload_id: 2, uploaded_at: 2025-05-13T07:06:27.613717
DEBUG:root:Fetched 16470 rows from budget_follow_up_report
DEBUG:root:Fetching latest data for table_alias: ethics_requirements_and_issues, cutoff: 2025-04-15T00:00:00
DEBUG:root:Upload log query result for ethics_requirements_and_issues: ('2025-05-13T11:33:55.745081', 7)
DEBUG:root:Selected upload_id: 7, uploaded_at: 2025-05-13T11:33:55.745081
DEBUG:root:Fetched 88889 rows from ethics_requirements_and_issues


In [23]:
tab3_commit_n   = (
        committed.pivot_table(
            index=committed["Commitment AO visa"].dt.month_name(),
            columns="Topic",
            values="Eu contribution",
            aggfunc="count",
            fill_value=0,
        )
        .reindex(scope_months)           # Jan-… only those in scope
        .reset_index(names="Commitment Month")
    )

tab3_commit_n["TOTAL"] = tab3_commit_n.iloc[:, 1:].sum(axis=1)

# Define a mapping of months to quarters
month_to_quarter = {
    "January": 1, "February": 1, "March": 1,
    "April": 2, "May": 2, "June": 2,
    "July": 3, "August": 3, "September": 3,
    "October": 4, "November": 4, "December": 4
}

# Add a quarter column to tab3_signed
tab3_commit_n["Quarter"] = tab3_commit_n["Commitment Month"].map(month_to_quarter)

# Determine the current quarter based on cutoff date (May 12, 2025 -> Quarter 2)
current_quarter = (cutoff.month - 1) // 3 + 1  # Quarter 2 for May

# Prepare final DataFrame with conditional quarterly aggregation
if not tab3_commit_n.empty:
    final_rows = []
    
    # Check if the data contains exactly three months
    unique_months = tab3_commit_n["Commitment Month"].nunique()
    max_quarter = tab3_commit_n["Quarter"].max()

    if unique_months == 3 and max_quarter == 1:
        # Special case: exactly three months, all in Quarter 1, show individually
        final_rows.append(tab3_commit_n.drop(columns=["Quarter"]))
    else:
        # General case: aggregate previous quarters, show current quarter months individually
        for quarter in sorted(tab3_commit_n["Quarter"].unique()):
            quarter_data = tab3_commit_n[tab3_commit_n["Quarter"] == quarter].copy()
            
            if quarter < current_quarter:
                # Aggregate previous quarters into a single row
                quarter_sum = quarter_data.iloc[:, 1:-1].sum(numeric_only=True)
                quarter_row = pd.DataFrame({
                    "Commitment Month": [f"Quarter {quarter}"],
                    **{col: [quarter_sum[col]] for col in quarter_data.columns[1:-2]},  # Topics
                    "TOTAL": [quarter_sum["TOTAL"]]
                })
                final_rows.append(quarter_row)
            else:
                # Keep individual months for the current quarter
                quarter_data = quarter_data.drop(columns=["Quarter"])
                final_rows.append(quarter_data)

    # Compute Grand Total
    col_totals = pd.DataFrame(tab3_commit_n.iloc[:, 1:-1].sum(), columns=["Grand Total"]).T
    col_totals.insert(0, "Commitment Month", "Grand Total")
    for col in tab3_commit_n.columns[1:-2]:  # Add totals for each topic column
        col_totals[col] = tab3_commit_n[col].sum()

    # Combine all rows
    agg_with_totals_n = pd.concat(final_rows + [col_totals], ignore_index=True)
else:
    agg_with_totals_n = tab3_commit_n

agg_with_totals_n['Type'] = 'numbers'

In [24]:
# Append agg_with_totals_n to agg_with_totals to create the final combined table
final_agg_table = pd.concat([agg_with_totals, agg_with_totals_n], ignore_index=True)
final_agg_table

Topic,Commitment Month,ERC-2024-COG,ERC-2024-POC,ERC-2024-STG,ERC-2024-SyG,TOTAL,Type
0,January,0.0,0.0,5233784.0,0.0,5233784.0,amounts
1,February,279950000.0,8700000.0,56614917.0,241431000.0,586695900.0,amounts
2,March,126325900.0,5400000.0,10972387.0,102268500.0,244966800.0,amounts
3,Grand Total,406275900.0,14100000.0,72821088.0,343699500.0,836896400.0,amounts
4,January,0.0,0.0,3.0,0.0,3.0,numbers
5,February,136.0,58.0,35.0,24.0,253.0,numbers
6,March,62.0,36.0,7.0,10.0,115.0,numbers
7,Grand Total,198.0,94.0,45.0,34.0,371.0,numbers


In [37]:
# Define colors
BLUE        = "#004A99"
LIGHT_BLUE = "#d6e6f4"
GRID_CLR    = "#004A99"
DARK_BLUE   = "#01244B"
DARK_GREY =   '#242425'
 # Define columns to display in the table (starting from index 1)
display_columns = final_agg_table.columns[1:-1].tolist()  # Exclude "Signature Month" and "Status"
# Create the great table
if not final_agg_table.empty:
    tbl = (
        GT(
            final_agg_table,
            rowname_col="Commitment Month",
            groupname_col="Type"
        )
        .tab_header(
            title="HE Commitment Activity"
        )

        # Format "amounts" group as currency (EUR with 2 decimal places)
        .fmt_number(
            columns=display_columns,
            rows=final_agg_table.index[final_agg_table["Type"] == "amounts"].tolist(),
            accounting=True,
            decimals=2,
            use_seps=True
        )
        # Format "numbers" group as integers
        .fmt_number(
            columns=display_columns,
            rows=final_agg_table.index[final_agg_table["Type"] == "numbers"].tolist(),
            decimals=0,
            use_seps=True
        )
        .tab_style(
            style.text(color=DARK_BLUE, weight="bold", align="center", font='Arial'),
            locations=loc.header()
        )
        .tab_stubhead(label="Commitment Month")
        .tab_style(
            style=[
                style.text(color=DARK_BLUE, weight="bold", font='Arial', size='medium'),
                style.fill(color=LIGHT_BLUE),
                style.css(f"border-bottom: 2px solid {DARK_BLUE}; border-right: 2px solid {DARK_BLUE}; border-top: 2px solid {DARK_BLUE}; border-left: 2px solid {DARK_BLUE};"),
                style.css("max-width:200px; line-height:1.2"),
            ],
            locations=loc.row_groups()
        )

        .opt_table_font(font="Arial")
        .tab_style(
            style=[
                style.fill(color=BLUE),
                style.text(color="white", weight="bold", align="center", size='small'),
                style.css("max-width:200px; line-height:1.2")
            ],
            locations=loc.column_labels()
        )
        .tab_style(
            style=[
                style.fill(color=BLUE),
                style.text(color="white", weight="bold", align="center",  size='small'),
                style.css("text-align: center; vertical-align: middle; max-width:200px; line-height:1.2")
            ],
            locations=loc.stubhead()
        )
        .tab_style(
            style=[style.borders(weight="1px", color=DARK_BLUE),
                   style.text( size='small')],
            locations=loc.stub()
        )
        .tab_style(
            style=[style.borders(sides="all", color=DARK_BLUE, weight="1px"),
                   style.text( align="center",  size='small')],
            locations=loc.body()
        )
        .tab_style(
            style=style.borders(color=DARK_BLUE, weight="2px"),
            locations=[loc.column_labels(), loc.stubhead()]
        )
        .tab_style(
            style=[style.fill(color="#D3D3D3"), style.text(color="black", weight="bold")],
            locations=loc.body(rows=final_agg_table.index[final_agg_table["Commitment Month"] == "Grand Total"].tolist())
        )
        .tab_style(
            style=[style.fill(color="#D3D3D3"), style.text(color="black", weight="bold")],
            locations=loc.stub(rows=final_agg_table.index[final_agg_table["Commitment Month"] == "Grand Total"].tolist())
        )
        .tab_options(
            table_body_border_bottom_color=DARK_BLUE,
            table_body_border_bottom_width="2px",
            table_border_right_color=DARK_BLUE,
            table_border_right_width="2px",
            table_border_left_color=DARK_BLUE,
            table_border_left_width="2px",
            table_border_top_color=DARK_BLUE,
            table_border_top_width="2px",
            column_labels_border_top_color=DARK_BLUE,
            column_labels_border_top_width="2px"
        )
        .tab_source_note("Source: Compass")
        .tab_source_note("Reports : Call Overview Report - Budget Follow-Up Report - Ethics Requirements and Issues " )
        .tab_style(
                    style=[ style.text(size="small")],
                    locations=loc.footer()
                )
        
    )

    # Display the table
    tbl
else:
    print("No data to display.")

In [38]:
 tbl

HE Commitment Activity,HE Commitment Activity,HE Commitment Activity,HE Commitment Activity,HE Commitment Activity,HE Commitment Activity
Commitment Month,ERC-2024-COG,ERC-2024-POC,ERC-2024-STG,ERC-2024-SyG,TOTAL
amounts,amounts,amounts,amounts,amounts,amounts
January,0.00,0.00,5233784.00,0.00,5233784.00
February,279949975.25,8700000.00,56614917.00,241430992.00,586695884.25
March,126325900.50,5400000.00,10972387.00,102268474.75,244966762.25
Grand Total,406275875.75,14100000.00,72821088.00,343699466.75,836896430.50
numbers,numbers,numbers,numbers,numbers,numbers
January,0,0,3,0,3
February,136,58,35,24,253
March,62,36,7,10,115
Grand Total,198,94,45,34,371
