In [None]:
import sqlite3
import json
import os
import logging
import pandas as pd
from datetime import datetime
from io import BytesIO
from great_tables import GT, loc, style, px, html
from great_tables.data import islands
from pathlib import Path
from typing import Any

# Fetch latest table data
def fetch_latest_table_data(conn: sqlite3.Connection, table_alias: str, cutoff: pd.Timestamp) -> pd.DataFrame:
    cutoff_str = cutoff.isoformat()
    query = """
        SELECT uploaded_at, id
        FROM upload_log
        WHERE table_alias = ?
        ORDER BY ABS(strftime('%s', uploaded_at) - strftime('%s', ?))
        LIMIT 1
    """
    result = conn.execute(query, (table_alias, cutoff_str)).fetchone()
    if not result:
        raise ValueError(f"No uploads found for table alias '{table_alias}' near cutoff {cutoff_str}")
    closest_uploaded_at, upload_id = result
    df = pd.read_sql_query(
        f"SELECT * FROM {table_alias} WHERE upload_id = ?",
        conn,
        params=(upload_id,)
    )
    return df

# Main execution
try:
    conn = sqlite3.connect(db_path)
    df = fetch_latest_table_data(conn, "c0_budgetary_execution_details", cutoff)
except Exception as e:
    print("Error:", e)
    logging.error(f"Main execution error: {str(e)}")
finally:
    conn.close()

df.columns

In [None]:
import sqlite3
import json
import os
import logging
import pandas as pd
from datetime import datetime
from io import BytesIO
from great_tables import GT, loc, style, px, html
from great_tables.data import islands
from pathlib import Path
from typing import Any


# Parameters
db_path = "database/reporting.db"
cutoff = pd.to_datetime("2025-04-15")
current_year = 2025
report = "Quarterly_Report"

# Configure logging
logging.basicConfig(level=logging.DEBUG)

def insert_variable(
    report: str,
    module: str,
    var: str,
    value: Any,
    db_path: str,
    anchor: str | None = None,
    gt_table=None,
) -> None:
    """
    Overwrite the row (report_name, var_name) with a new value (and picture).

    Exactly ONE row per variable is kept.
    """
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    try:
        # 1) remove any previous copy of this variable
        cur.execute(
            "DELETE FROM report_variables WHERE report_name = ? AND var_name = ?",
            (report, var),
        )

        # 2) serialise the Python value
        val_json = json.dumps(value, default=str)

        # 3) optional: render great‑tables object to PNG → bytes
        gt_image = None
        if gt_table is not None:
            tmp = Path(f"__gt_{var}.png")
            gt_table.save(tmp)                 # playwright renders PNG
            gt_image = tmp.read_bytes()
            tmp.unlink(missing_ok=True)

        # 4) insert the fresh row
        cur.execute(
            """
            INSERT INTO report_variables
                  (report_name, module_name, var_name,
                   anchor_name, value, gt_image, created_at)
            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
            """,
            (report, module, var, anchor or var, val_json, gt_image),
        )

        con.commit()
        logging.debug("Stored variable %s for report %s (rowid=%s)",
                      var, report, cur.lastrowid)

    except Exception as exc:
        con.rollback()
        logging.error("insert_variable failed for %s/%s: %s", report, var, exc)
        raise
    finally:
        con.close()


# Fetch latest table data
def fetch_latest_table_data(conn: sqlite3.Connection, table_alias: str, cutoff: pd.Timestamp) -> pd.DataFrame:
    cutoff_str = cutoff.isoformat()
    query = """
        SELECT uploaded_at, id
        FROM upload_log
        WHERE table_alias = ?
        ORDER BY ABS(strftime('%s', uploaded_at) - strftime('%s', ?))
        LIMIT 1
    """
    result = conn.execute(query, (table_alias, cutoff_str)).fetchone()
    if not result:
        raise ValueError(f"No uploads found for table alias '{table_alias}' near cutoff {cutoff_str}")
    closest_uploaded_at, upload_id = result
    df = pd.read_sql_query(
        f"SELECT * FROM {table_alias} WHERE upload_id = ?",
        conn,
        params=(upload_id,)
    )
    return df

# Build commitment summary table
def build_commitment_summary_table(df: pd.DataFrame, current_year: int, report: str, db_path: str) -> pd.DataFrame:
    df = df[df["Budget Period"] == current_year]
    df = df[df["Fund Source"].isin(["VOBU", "EFTA"])]
    df["Programme"] = df["Functional Area Desc"].replace({
        "HORIZONEU_21_27": "HE",
        "H2020_14_20": "H2020"
    })
    agg = df.groupby("Programme")[
        ["Commitment Appropriation", "Committed Amount", "Commitment Available "]
    ].sum().reset_index()
    agg["%"] = agg["Committed Amount"] / agg["Commitment Appropriation"]
    # agg = agg.rename(columns={
    #     "Commitment Appropriation": "Available Commitment Appropriations (1)",
    #     "Committed Amount": "L1 Commitment (2)",
    #     "Commitment Available ": "RAL on Appropriation (3)=(1)-(2)",
    #     "%": "% consumed of L1 and L2 against Commitment Appropriations (4) = (2)/(1)"
    # })
    agg = agg.rename(columns={
        "Commitment Appropriation": "Available_Commitment_Appropriations",
        "Committed Amount": "L1_Commitment",
        "Commitment Available ": "RAL_on_Appropriation",
        "%": "ratio_consumed_of_L1_and_L2_against_Commitment_Appropriations"
    })
    agg = agg.loc[agg["Programme"] == "HE"]

    BLUE        = "#004A99"
    LIGHT_BLUE  = "#e6eff9"
    GRID_CLR    = "#004A99"
    DARK_BLUE   = "#01244B"
    DARK_GREY =   '#242425'
    

    tbl = (
        GT(agg)
        # .tab_header("Commitment Appropriations", subtitle="General Overview")
        # .tab_stub(rowname_col="Programme")
        .tab_stubhead("Programme")
        # ── formats ────────────────────────────────────────────────────────────
        # .fmt_number(columns=[
        #     "Available Commitment Appropriations (1)",
        #     "L1 Commitment (2)",
        #     "RAL on Appropriation (3)=(1)-(2)"
        # ], accounting=True, decimals=2)
        # .fmt_percent(
        #     columns="% consumed of L1 and L2 against Commitment Appropriations (4) = (2)/(1)",
        #     decimals=2
        # )
        .fmt_number(columns=[
            "Available_Commitment_Appropriations",
            "L1_Commitment",
            "RAL_on_Appropriation"
        ], accounting=True, decimals=2)
        .fmt_percent(
            columns="ratio_consumed_of_L1_and_L2_against_Commitment_Appropriations",
            decimals=2
        )
   
        # ── Set custom column labels with <br> for line breaks ─────────────────
        .cols_label(
        Available_Commitment_Appropriations = html("Available Commitment Appropriations<br>(1)"),
        L1_Commitment = html("L1 Commitment<br>(2)"),
        RAL_on_Appropriation = html("RAL on Appropriation<br>(3)=(1)-(2)"),
        ratio_consumed_of_L1_and_L2_against_Commitment_Appropriations  = html("% consumed of L1 and L2<br>against Commitment Appropriations <br> (4) = (2)/(1)")
    )
        # ── Arial everywhere ──────────────────────────────────────────────────
        .opt_table_font(font="Arial")
        # ── HEADER + STUB COLOUR ──────────────────────────────────────────────
        .tab_style(
                    style = [
                            style.fill(color= BLUE),        
                            style.text(color="white", weight="bold", align='center'),
                            style.css("word-wrap: break-word; white-space: normal; max-width: 200px;"),
                            style.css("line-height: 1.2; margin-bottom: 5px;")
                    ],
                    locations = loc.column_labels()
                  ) 
        .tab_style(
                    style = [
                        style.fill(color= BLUE),
                        style.text(color="white", weight="bold"),
                    ],
                    locations = loc.stubhead()
                   )
        
        # ── GRID LINES ────────────────────────────────────────────────────────
        .tab_style(
            style = style.borders(sides=["all"], color=DARK_BLUE , weight='2px'),
            locations = loc.body()
        )
        .tab_style(
            style = style.borders( color=DARK_BLUE, weight='2px'),
            locations = loc.column_labels()
        )

        .tab_style(
            style = style.borders( color=DARK_BLUE, weight='2px'),
            locations = loc.stubhead()
        )

        # ── ROW STRIPING ───────────────────────────────────────────────────────
        .tab_options(table_body_border_bottom_color = DARK_BLUE , table_body_border_bottom_width = '2px')
        # ── SOURCE NOTE ────────────────────────────────────────────────────────
        .tab_source_note("Source: Summa DataWarehouse")
        .tab_source_note("BO Report: C0_Budgetary_Execution_Details")
    )


    insert_variable(
        report=report,
        module="BudgetModule",
        var="table_1a_commitment_summary",
        value=agg.to_dict(orient="records"),
        db_path=db_path,
        anchor="table_1a",
        gt_table=tbl
    )
    return agg,tbl

# Main execution
try:
    conn = sqlite3.connect(db_path)
    df_comm = fetch_latest_table_data(conn, "c0_budgetary_execution_details", cutoff)
    agg,tbl = build_commitment_summary_table(df_comm, current_year, report, db_path)
except Exception as e:
    print("Error:", e)
    logging.error(f"Main execution error: {str(e)}")
finally:
    conn.close()

In [None]:
import sqlite3
import json
import os
import logging
import pandas as pd
from datetime import datetime
from io import BytesIO
from great_tables import GT, loc, style, px, html
from great_tables.data import islands
from pathlib import Path
from typing import Any


BLUE        = "#004A99"
LIGHT_BLUE  = "#e6eff9"
GRID_CLR    = "#004A99"
DARK_BLUE   = "#01244B"
DARK_GREY =   '#242425'


# Parameters
db_path = "database/reporting.db"
cutoff = pd.to_datetime("2025-04-15")
current_year = 2025
report = "Quarterly_Report"

# Configure logging
logging.basicConfig(level=logging.DEBUG)

def insert_variable(
    report: str,
    module: str,
    var: str,
    value: Any,
    db_path: str,
    anchor: str | None = None,
    gt_table=None,
) -> None:
    """
    Overwrite the row (report_name, var_name) with a new value (and picture).

    Exactly ONE row per variable is kept.
    """
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    try:
        # 1) remove any previous copy of this variable
        cur.execute(
            "DELETE FROM report_variables WHERE report_name = ? AND var_name = ?",
            (report, var),
        )

        # 2) serialise the Python value
        val_json = json.dumps(value, default=str)

        # 3) optional: render great‑tables object to PNG → bytes
        gt_image = None
        if gt_table is not None:
            tmp = Path(f"__gt_{var}.png")
            gt_table.save(tmp)                 # playwright renders PNG
            gt_image = tmp.read_bytes()
            tmp.unlink(missing_ok=True)

        # 4) insert the fresh row
        cur.execute(
            """
            INSERT INTO report_variables
                  (report_name, module_name, var_name,
                   anchor_name, value, gt_image, created_at)
            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
            """,
            (report, module, var, anchor or var, val_json, gt_image),
        )

        con.commit()
        logging.debug("Stored variable %s for report %s (rowid=%s)",
                      var, report, cur.lastrowid)

    except Exception as exc:
        con.rollback()
        logging.error("insert_variable failed for %s/%s: %s", report, var, exc)
        raise
    finally:
        con.close()


# Fetch latest table data
def fetch_latest_table_data(conn: sqlite3.Connection, table_alias: str, cutoff: pd.Timestamp) -> pd.DataFrame:
    cutoff_str = cutoff.isoformat()
    query = """
        SELECT uploaded_at, id
        FROM upload_log
        WHERE table_alias = ?
        ORDER BY ABS(strftime('%s', uploaded_at) - strftime('%s', ?))
        LIMIT 1
    """
    result = conn.execute(query, (table_alias, cutoff_str)).fetchone()
    if not result:
        raise ValueError(f"No uploads found for table alias '{table_alias}' near cutoff {cutoff_str}")
    closest_uploaded_at, upload_id = result
    df = pd.read_sql_query(
        f"SELECT * FROM {table_alias} WHERE upload_id = ?",
        conn,
        params=(upload_id,)
    )
    return df


def build_payment_summary_tables(
        df: pd.DataFrame,
        current_year: int,
        report: str,
        db_path: str
) -> dict[str, pd.DataFrame]:
    """
    Build & store one payment‑summary table per programme (HE, H2020).

    Returns
    -------
    dict
        keys  = programme code ("HE", "H2020")
        value = aggregated DataFrame for that programme
    """
    # ---------- common pre‑filtering ---------------------------------------------------
    df = df[(df["Budget Period"] == current_year) &
            (df["Fund Source"].isin(["VOBU", "EFTA"]))]

    # Map Functional Area → Programme
    df["Programme"] = df["Functional Area Desc"].replace({
        "HORIZONEU_21_27": "HE",
        "H2020_14_20": "H2020"
    })

    # Budget‑type mapping helper
    def map_budget_type(val):
        if pd.isna(val):
            return None
        v = str(val).upper()
        if "EMPTY" in v: return "Main Call"
        if "EXPERTS" in v: return "Experts"
        return val

    df["Budget_Address_Type"] = df["Budget Address"].apply(map_budget_type)

    # ---------- iterate over the two programmes ---------------------------------------
    results: dict[str, pd.DataFrame] = {}
    df["Fund Source"] = 'VOBU/EFTA'
    for programme in ("HE", "H2020"):
        df_p = df[df["Programme"] == programme]

        if df_p.empty:
            logging.warning("No rows for programme %s", programme)
            continue

        # -------- aggregation ------------------------------------------------------
        agg = (df_p
               .groupby(["Programme", "Fund Source", "Budget_Address_Type"],
                        as_index=False)[
                    ["Payment Appropriation", "Paid Amount",
                     "Payment Available"]]
               .sum())

        agg["%"] = agg["Paid Amount"] / agg["Payment Appropriation"]

        agg = agg.rename(columns={
            "Payment Appropriation": "Available_Payment_Appropriations",
            "Paid Amount": "Paid_Amount",
            "Payment Available": "Remaining_Payment_Appropriation",
            "%": "ratio_consumed_Payment_Appropriations"
        })

        # -------- Add total row ---------------------------------------------------
        total_row = pd.DataFrame({
            "Programme": ["Total"],
            "Fund Source": ['VOBU/EFTA'],
            "Budget_Address_Type": ['Total'],
            "Available_Payment_Appropriations": [agg["Available_Payment_Appropriations"].sum()],
            "Paid_Amount": [agg["Paid_Amount"].sum()],
            "Remaining_Payment_Appropriation": [agg["Remaining_Payment_Appropriation"].sum()],
            "ratio_consumed_Payment_Appropriations": [agg["Paid_Amount"].sum() / agg["Available_Payment_Appropriations"].sum()]
        })
        agg = pd.concat([agg, total_row], ignore_index=True)

        # -------- GreatTables object -------------------------------------------
        tbl = (
            GT(agg
            
               )
            .tab_stubhead("Programme")
            .tab_stubhead("Fund Source")
            .tab_stubhead("Budget_Address_Type")

            .fmt_number(columns=[
                "Available_Payment_Appropriations",
                "Paid_Amount",
                "Remaining_Payment_Appropriation"
            ], accounting=True, decimals=2)

            .fmt_percent(
                columns="ratio_consumed_Payment_Appropriations",
                decimals=2)

            .cols_label(
                Available_Payment_Appropriations=html("Payment Appropriations<br>(1)"),
                Paid_Amount=html("Payment Credits consumed<br>(Acceptance Date)<br>(2)"),
                Remaining_Payment_Appropriation=html("Remaining Payment Appropriations<br>(3)=(1)-(2)"),
                ratio_consumed_Payment_Appropriations=html("% Payment Consumed<br>(4) = (2)/(1)")
            )

            .opt_table_font(font="Arial")

            .tab_style(
                style=[style.fill(color=BLUE),
                       style.text(color="white", weight="bold", align="center"),
                       style.css("max-width:200px; line-height:1.2")],
                locations=loc.column_labels())

            .tab_style(
                style=[style.css(f"border: 2px solid {BLUE};")],
                locations=loc.header()
                )

            .tab_style(
                style=[style.fill(color=BLUE),
                       style.text(color="white", weight="bold")],
                locations=loc.stubhead())

            .tab_style(
                style=style.borders(sides="all", color=DARK_BLUE, weight="2px"),
                locations=loc.body())

            .tab_style(
                style=style.borders(color=DARK_BLUE, weight="2px"),
                locations=[loc.column_labels(), loc.stubhead()])

            .tab_style(
                style=[style.fill(color="#E6E6FA"),  # Light purple for total row
                       style.text(color="black", weight="bold")],
                locations=loc.body(rows=agg.index[-1]))  # Apply to the last row (total)

            .tab_options(table_body_border_bottom_color=DARK_BLUE,
                         table_body_border_bottom_width="2px")

            .tab_options(table_border_top_color=DARK_BLUE,
                         table_border_top_width="2px")

            .tab_source_note("Source: Summa DataWarehouse")
            .tab_source_note("BO Report: C0_Budgetary_Execution_Details")
        )

        # -------- store --------------------------------------------------------

        insert_variable(
            report=report,
            module="BudgetModule",
            var=f"table_2a_{programme}_data",
            value=agg.to_dict(orient="records"),
            db_path=db_path,
            anchor=f"table_2a_{programme}",
            gt_table=tbl,
        )

        logging.debug("Stored 2a table for programme %s (%d rows)",
                      programme, len(agg))

        results[programme] = agg

    #---- END loop ------

    return results
# Main execution
try:
    conn = sqlite3.connect(db_path)
    df = fetch_latest_table_data(conn, "c0_budgetary_execution_details", cutoff)
    agg,tbl = build_payment_summary_tables(df, current_year, report, db_path)
except Exception as e:
    print("Error:", e)
    logging.error(f"Main execution error: {str(e)}")
finally:
    conn.close()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Programme"] = df["Functional Area Desc"].replace({
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Budget_Address_Type"] = df["Budget Address"].apply(map_budget_type)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Fund Source"] = 'VOBU/EFTA'
DEBUG:selenium.webdriver.common.selenium_manag