## Dump all transfers in per-infrastructure files

Format them in the data-collection-schema format.

In [None]:
from pathlib import Path
import sys
import os
import django
from asgiref.sync import sync_to_async

# Add the parent directory to the system path and setup django
BASE_DIR = Path(os.getcwd()).resolve().parent

if str(BASE_DIR) not in sys.path:
    sys.path.append(str(BASE_DIR))

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.models import Transfer, Entity, Identifier
from tsosi.models.date import Date
import pandas as pd


@sync_to_async
def to_run():
    instances = Transfer.objects.all().values(
        "emitter_id",
        "recipient_id",
        "agent_id",
        "amount",
        "currency_id",
        "hide_amount",
        "date_clc",
        "date_invoice",
        "date_payment_recipient",
        "date_payment_emitter",
        "date_start",
        "date_end",
        "description",
        "scoss",
    )
    df = pd.DataFrame.from_records(instances)

    entities = Entity.objects.all().values("id", "name", "country", "website")
    df_e = pd.DataFrame.from_records(entities)

    identifiers = Identifier.objects.all().values(
        "registry_id", "value", "entity_id"
    )
    df_ids = pd.DataFrame.from_records(identifiers)
    ror_ids = df_ids[df_ids["registry_id"] == "ror"]
    wikidata_ids = df_ids[df_ids["registry_id"] == "wikidata"]

    df_e["ror_id"] = df_e["id"].map(ror_ids.set_index("entity_id")["value"])
    df_e["wikidata_id"] = df_e["id"].map(
        wikidata_ids.set_index("entity_id")["value"]
    )

    df = df.merge(df_e.add_prefix("emitter_"), on="emitter_id", how="left")
    df = df.merge(df_e.add_prefix("recipient_"), on="recipient_id", how="left")
    df = df.merge(df_e.add_prefix("agent_"), on="agent_id", how="left")

    date_cols = [
        "date_clc",
        "date_invoice",
        "date_payment_recipient",
        "date_payment_emitter",
        "date_start",
        "date_end",
    ]

    for col in date_cols:
        df[col] = df[col].apply(
            lambda x: Date(**x).format() if not pd.isna(x) else x
        )

    df.rename(
        columns={c: c.replace("emitter_", "institution/") for c in df.columns},
        inplace=True,
    )
    df.rename(
        columns={c: c.replace("agent_", "intermediary/") for c in df.columns},
        inplace=True,
    )
    df.rename(
        columns={
            c: c.replace("recipient_", "infrastructure/") for c in df.columns
        },
        inplace=True,
    )

    c_to_create = ["contract/id", "contract/description"]
    for c in c_to_create:
        df[c] = None

    c_mapping = {
        "date_payment_emitter": "date_emitted",
        "date_payment_recipient": "date_received",
        "date_start": "contract/date_start",
        "date_end": "contract/date_end",
        "currency_id": "currency",
    }
    df.rename(columns=c_mapping, inplace=True)

    cols_for_export = [
        "infrastructure/name",
        "institution/name",
        "institution/ror_id",
        "institution/wikidata_id",
        "institution/country",
        "institution/website",
        "intermediary/name",
        "intermediary/ror_id",
        "intermediary/wikidata_id",
        "intermediary/country",
        "intermediary/website",
        "amount",
        "currency",
        "hide_amount",
        "date_clc",
        "date_emitted",
        "date_received",
        "date_invoice",
        "scoss",
        "contract/id",
        "contract/description",
        "contract/date_start",
        "contract/date_end",
    ]

    return df[cols_for_export].sort_values("date_clc", ascending=False)


df = await to_run()

In [2]:
from datetime import date

infra_col = "infrastructure/name"
infras: list[str] = df[infra_col].drop_duplicates().to_list()

date_str = date.today().strftime("%Y-%m-%d")
folder = BASE_DIR / f"_no_git/fixtures/"


for infra in infras:
    extract = df[df[infra_col] == infra].copy()
    del extract[infra_col]
    file_name = f"{date_str}_TSOSI_{infra.replace(" ", "_")}_transfers.xlsx"
    extract.to_excel(str(folder / file_name), index=False)