In [None]:
from pathlib import Path

import pandas as pd

from ttsim import main
from ttsim.tt_dag_elements import ScalarParam

GETTSIM_ROOT = Path.cwd() / "src" / "_gettsim"

# Prototypes of GETTSIM's new interface

[GEP 7](https://gettsim--855.org.readthedocs.build/en/855/geps/gep-07.html) discusses
the principles of the new interface. This notebook demonstrates two candidates for
GETTIM's new interface. We would like to get your feedback on which one you prefer.

In this notebook, we compute income taxes and social security contributions for example
data.

## Setup

This notebook requires to have GETTSIM installed in its current development version.

To do this:
1. Clone the GETTSIM repository.
2. Install the [pixi package manager](https://pixi.sh/latest/) on your system.
3. `cd` into the GETTSIM repository and run `git checkout inputs-for-main`.
4. Run `pixi run jupyter-notebook` and select the `interface-prototype.ipynb` notebook.

If you have trouble with the setup, please reach out.

## Creating the Data

First, we create some example data. Here, we use a pandas DataFrame with column names
that are different from the ones GETTSIM expects.

In [None]:
# Some example data as a standard pandas DataFrame
DATA = pd.DataFrame(
    {
        "age": [30, 30, 10],
        "working_hours": [35, 35, 0],
        "disability_grade": [0, 0, 0],
        "birth_year": [1995, 1995, 2015],
        "hh_id": [0, 0, 0],
        "p_id": [0, 1, 2],
        "east_germany": [False, False, False],
        "self_employed": [False, False, False],
        "income_from_self_employment": [0, 0, 0],
        "income_from_rent": [0, 0, 0],
        "income_from_employment": [5000, 4000, 0],
        "income_from_forest_and_agriculture": [0, 0, 0],
        "income_from_capital": [500, 0, 0],
        "income_from_other_sources": [0, 0, 0],
        "pension_income": [0, 0, 0],
        "contribution_to_private_pension_insurance": [0, 0, 0],
        "childcare_expenses": [0, 0, 0],
        "person_that_pays_childcare_expenses": [-1, -1, 0],
        "joint_taxation": [True, True, False],
        "amount_private_pension_income": [0, 0, 0],
        "contribution_private_health_insurance": [0, 0, 0],
        "has_children": [True, True, False],
        "single_parent": [False, False, False],
        "is_child": [False, False, True],
        "spouse_id": [1, 0, -1],
        "parent_id_1": [-1, -1, 0],
        "parent_id_2": [-1, -1, 1],
        "in_training": [False, False, False],
        "id_recipient_child_allowance": [-1, -1, 0],
        "wohngeld": [0, 0, 0],
        "kinderzuschlag": [0, 0, 0],
        "elterngeld": [0, 0, 0],
        "alg1": [0, 0, 0],
        "old_age_pension_income": [0, 0, 0],
        "bürgergeld": [0, 0, 0],
    }
)

The first step in GETTSIM's new workflow is to define the targets you're interested in.
The keys of the nested dictionary below are the paths GETTSIM will use as targets. For
instance, via the keys `einkommensteuer` and `betrag_m_sn`, we request the income tax as
a target.

The values on the lowest level of the dictionaries will be used as the column names of
the resulting DataFrame. Here, `income_tax_y` will be the name of the column containing
the income tax results.

In [None]:
TARGETS_TREE = {
    "einkommensteuer": {"betrag_y_sn": "income_tax_y"},
    "sozialversicherung": {
        "pflege": {
            "beitrag": {
                "betrag_versicherter_m": "long_term_care_insurance_contribution_m"
            }
        },
        "kranken": {
            "beitrag": {"betrag_versicherter_m": "health_insurance_contribution_m"}
        },
        "rente": {
            "beitrag": {"betrag_versicherter_m": "pension_insurance_contribution_m"}
        },
        "arbeitslosen": {
            "beitrag": {
                "betrag_versicherter_m": "unemployment_insurance_contribution_m"
            }
        },
    },
}

Next, we define a mapping from GETTSIM's expected input structure to your data. As
above, we map the paths GETTSIM uses to the columns of your data. (We will provide
templates for this, so you won't have to type the paths manually.)

In [None]:
TREE_TO_DF_MAPPER = {
    "alter": "age",
    "arbeitsstunden_w": "working_hours",
    "behinderungsgrad": "disability_grade",
    "geburtsjahr": "birth_year",
    "hh_id": "hh_id",
    "p_id": "p_id",
    "wohnort_ost": "east_germany",
    "einkommensteuer": {
        "einkünfte": {
            "ist_selbstständig": "self_employed",
            "aus_gewerbebetrieb": {"betrag_m": "income_from_self_employment"},
            "aus_vermietung_und_verpachtung": {"betrag_m": "income_from_rent"},
            "aus_nichtselbstständiger_arbeit": {
                "bruttolohn_m": "income_from_employment"
            },
            "aus_forst_und_landwirtschaft": {
                "betrag_m": "income_from_forest_and_agriculture"
            },
            "aus_selbstständiger_arbeit": {"betrag_m": "income_from_self_employment"},
            "aus_kapitalvermögen": {"kapitalerträge_m": "income_from_capital"},
            "sonstige": {
                "ohne_renten_m": "income_from_other_sources",
                "renteneinkünfte_m": "pension_income",
            },
        },
        "abzüge": {
            "beitrag_private_rentenversicherung_m": "contribution_to_private_pension_insurance",  # noqa: E501
            "kinderbetreuungskosten_m": "childcare_expenses",
            "p_id_kinderbetreuungskostenträger": "person_that_pays_childcare_expenses",
        },
        "gemeinsam_veranlagt": "joint_taxation",
    },
    "sozialversicherung": {
        "arbeitslosen": {"betrag_m": "alg1"},
        "rente": {
            "private_rente_betrag_m": "amount_private_pension_income",
            "altersrente": {
                "betrag_m": "old_age_pension_income",
            },
        },
        "kranken": {
            "beitrag": {"privat_versichert": "contribution_private_health_insurance"}
        },
        "pflege": {"beitrag": {"hat_kinder": "has_children"}},
    },
    "familie": {
        "alleinerziehend": "single_parent",
        "kind": "is_child",
        "p_id_ehepartner": "spouse_id",
        "p_id_elternteil_1": "parent_id_1",
        "p_id_elternteil_2": "parent_id_2",
    },
    "wohngeld": {
        "betrag_m_wthh": "wohngeld",
    },
    "kinderzuschlag": {
        "betrag_m_bg": "kinderzuschlag",
    },
    "elterngeld": {
        "betrag_m": "elterngeld",
    },
    "arbeitslosengeld_2": {
        "betrag_m_bg": "bürgergeld",
    },
    "kindergeld": {
        "in_ausbildung": "in_training",
        "p_id_empfänger": "id_recipient_child_allowance",
    },
}

## Using GETTSIM's interface

Just as for taxes and transfers, GETTSIM's infrastructure is a DAG. GETTSIM's interface
is a function that allows you to interact with this DAG. This comes with the
advantages GETTSIM's users already know from the taxes and transfers part:
- Users can select any part of the DAG as a target. This means that users can access
  any intermediate objects.
- Users can feed any part of the DAG as input. This means that users can overwrite
  specific parts of the DAG (e.g. the policy environment).
- Users can decide which parts of the DAG not to compute. For example, users can choose
  not to perform safety checks on the input data. This means that GETTSIM is quicker in
  computing the result (at the expense of informative errors).

First, we look at the one-stop shop: computing the targets defined above using the input
data. In a second example, we manipulate the policy environment to see why the interface
DAG is useful.

### One-stop-shop: Computing taxes and transfers with GETTSIM

Now we can compute taxes and transfers. For this, we need to call the `main` function.
`main` takes two input arguments:
- `inputs`: a nested dictionary of the inputs you're passing to GETTSIM.
- `output_names`: a list of the outputs you want to get from GETTSIM.

`inputs` can be specified as a nested dictionary (see below) or as strings, separating
nesting levels with `__` (e.g. `"input_data__df_with_mapper_df"`).

Let's calculate taxes and transfers first:

In [None]:
result = main(
    inputs={
        "date_str": "2025-01-01",
        "input_data": {
            "df_and_mapper": {
                "df": DATA,
                "mapper": TREE_TO_DF_MAPPER,
            },
        },
        "targets": {
            "tree": TARGETS_TREE,
        },
        "orig_policy_objects": {
            "root": GETTSIM_ROOT
        },  # don't worry about this, will be gone in the future
    },
    output_names=["results__df_with_mapper"],
)["results__df_with_mapper"]
result.T

Input data can also be specified directly as a tree:

```python
result = main(
    inputs={
        "input_data": {
            "tree": INPUT_TREE,
        },
        ...
```

Or as a DataFrame with MultiIndex columns:

```python
result = main(
    inputs={
        "input_data": {
            "df_with_nested_columns": DF_WITH_NESTED_COLUMNS,
        },
        ...
```

### Manipulating the policy environment

First, we obtain the policy environment for the policy date we're interested in. Similar
to above, we have to call the `main` function.

In [None]:
policy_environment = main(
    inputs={
        "date_str": "2025-01-01",
        "orig_policy_objects": {
            "root": GETTSIM_ROOT
        },  # don't worry about this, will be gone in the future
    },
    output_names=["policy_environment"],
)["policy_environment"]

Now, we slightly modify the policy environment. In this simple example, we increase the
contribution rate of the public pension insurance by 1 percentage point.

The contribution rate is a `ScalarParam` object:

In [None]:
type(policy_environment["sozialversicherung"]["rente"]["beitrag"]["beitragssatz"])

We replace the `ScalarParam` object with a new one. Then, we add this parameter to the
new policy environment.

In [None]:
old_beitragssatz = policy_environment["sozialversicherung"]["rente"]["beitrag"][
    "beitragssatz"
]
new_beitragssatz = ScalarParam(  # don't worry too much about this, will get easier
    leaf_name=old_beitragssatz.leaf_name,
    start_date=old_beitragssatz.start_date,
    end_date=old_beitragssatz.end_date,
    value=old_beitragssatz.value + 0.01,
    unit=old_beitragssatz.unit,
    description=old_beitragssatz.description,
    name=old_beitragssatz.name,
    reference_period=old_beitragssatz.reference_period,
)

modified_policy_environment = policy_environment.copy()
modified_policy_environment["sozialversicherung"]["rente"]["beitrag"][
    "beitragssatz"
] = new_beitragssatz

Now we can compute taxes and transfers using the modified policy environment.

In [None]:
result = main(
    inputs={
        "date_str": "2025-01-01",
        "input_data": {
            "df_and_mapper": {
                "df": DATA,
                "mapper": TREE_TO_DF_MAPPER,
            },
        },
        "targets": {
            "tree": TARGETS_TREE,
        },
        "policy_environment": policy_environment,
    },
    output_names=["results__df_with_mapper"],
)["results__df_with_mapper"]
result.T