### Fill DB with partners data


In [None]:
from pathlib import Path
import sys
import os
import django
from asgiref.sync import sync_to_async

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.data_preparation import prepare_data, get_input_config
from tsosi.data.ingestion import ingest_new_records
from tsosi.models import empty_db

EMPTY_DB = True


@sync_to_async
def to_run():
    # Fill DB
    if EMPTY_DB:
        empty_db()

    doaj_publisher_2021 = get_input_config(
        "doaj_publisher_2021",
        "2025-01-07-DOAJ_Publisher_Report_2021_prepared.xlsx",
        sheet_name="Sheet1",
    )
    doaj_publisher_2022 = get_input_config(
        "doaj_publisher_2022",
        "2025-01-07-DOAJ_Publisher_Report_2022_prepared.xlsx",
        sheet_name="Sheet1",
    )
    doaj_publisher_2023 = get_input_config(
        "doaj_publisher_2023",
        "2025-01-07-DOAJ_Publisher_Report_2023_prepared.xlsx",
        sheet_name="Sheet1",
    )
    doaj_publisher_2024 = get_input_config(
        "doaj_publisher_2024",
        "2025-01-07-DOAJ_Publisher_Report_2024_prepared.xlsx",
        sheet_name="Sheet1",
    )
    configs = [
        doaj_publisher_2021,
        doaj_publisher_2022,
        doaj_publisher_2023,
        doaj_publisher_2024,
    ]
    configs = [doaj_publisher_2021, doaj_publisher_2022]
    configs = [doaj_publisher_2021]
    for config in configs:
        prepare_data(config)
        ingest_new_records(config.processed_data)

    return config


res = await to_run()

In [None]:
res

### Enrich existing entities with ROR data


In [None]:
from pathlib import Path
import sys
import os
import pandas as pd
import django
from asgiref.sync import sync_to_async
from django.utils import timezone

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.pid_matching import (
    entities_with_no_ror,
    match_entities_to_pid,
)
from tsosi.models import Identifier, Entity, Transfert
from tsosi.data.ingestion import ingest_partners_data


to_merge = pd.DataFrame(
    {
        "entity_id": [
            "028b2cfd-bea6-49bf-8576-5824d808f290",
            "028b2cfd-bea6-49bf-8576-5824d808f290",
        ],
        "merged_with_id": [
            "03a89952-4bbf-43dc-b8dc-080695a19cda",
            "03a89952-4bbf-43dc-b8dc-080695a19cda",
        ],
        "merged_criteria": ["AHAH", "BHBH"],
        "match_source": ["automatic", "automatic"],
        "match_criteria": ["merged", "merged"],
    }
).reset_index()


now = timezone.now()


@sync_to_async
def to_run():
    entities = entities_with_no_ror()
    match_entities_to_pid(entities, export_to_verify=False, limit=20)


res = await to_run()

### Fetch PID records


In [None]:
from pathlib import Path
import sys
import os
import django
from asgiref.sync import sync_to_async
from django.utils import timezone

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.enrichment import entities_with_identifier_data

now = timezone.now()


@sync_to_async
def to_run():
    # IdentifierVersion.objects.all().delete()
    entities = entities_with_identifier_data(now)
    return entities


res = await to_run()

In [None]:
res.loc[0, "record"]

### Analyze & ingest PID record data


In [None]:
from pathlib import Path
import sys
import os
import django
from asgiref.sync import sync_to_async
from django.utils import timezone

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.models import Entity, IdentifierVersion
from tsosi.data.enrichment import (
    update_entity_from_pid_records,
    update_wikipedia_extract,
    new_identifiers_from_records,
    update_logos,
    update_transfert_date_clc,
    update_entity_roles_clc,
)

now = timezone.now()

IDS = ["Q1227538", "Q945876"]


@sync_to_async
def to_run():
    # IdentifierVersion.objects.all().delete()
    # Entity.objects.all().update(logo=None, date_logo_fetched=None)
    # update_entity_from_pid_records()
    new_identifiers_from_records()
    # update_wikipedia_extract()
    # update_logos()
    # update_transfert_date_clc()
    # update_entity_roles_clc()


res = await to_run()

In [None]:
res[0].loc[10]

### Fetch currency rates and convert amounts


In [None]:
from pathlib import Path
import sys
import os
import django
from asgiref.sync import sync_to_async
from django.utils import timezone

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.currencies.currency_rates import currency_rates_workflow

now = timezone.now()

IDS = ["Q1227538", "Q945876"]


@sync_to_async
def to_run():
    currency_rates_workflow()


res = await to_run()

In [None]:
import pandas as pd

df = pd.DataFrame(
    data=[
        ["First_1", "First_2", "First_3"],
        [None, "Second_2", "Second_3"],
        [None, None, "Third_3"],
        ["Fourth_1", None, None],
        [None, "Fifth_2", None],
    ],
    columns=["name_1", "name_2", "name_3"],
)
res = df[["name_2", "name_1", "name_3"]].bfill(axis=1)
for row in df.iterrows():
    pass
res