### 0 - Get SciPost data


In [None]:
from get_data import get_scipost_raw_data, enrich_scipost_raw_data
from datetime import date
from pathlib import Path

today_str = date.today().strftime("%Y-%m-%d")
raw_data_file = f"{today_str}_scipost_raw.json"

raw_data_folder = Path.home() / "Nextcloud/TSOSI_data/scipost/0_raw"

raw_path = str(raw_data_folder / raw_data_file)
get_scipost_raw_data(dest_file=raw_path)

### 1 - Pre-process data


In [None]:
from get_data import pre_process_data
from pathlib import Path
import pandas as pd

date_str = "2025-03-05"
raw_file = f"{date_str}_scipost_raw.json"
raw_folder = Path.home() / "Nextcloud/TSOSI_data/scipost/0_raw"
raw_path = str(raw_folder / raw_file)

processed_file = f"{date_str}_scipost_pre_processed.json"
processed_folder = Path.home() / "Nextcloud/TSOSI_data/scipost/1_pre_processed"
processed_path = str(processed_folder / processed_file)

df = pd.read_json(raw_path, orient="records")
data = pre_process_data(df)
data.to_json(processed_path, orient="records", indent=2, index=False)

### 3 - Enrich data with ROR ID


In [None]:
from pathlib import Path
from get_data import enrich_scipost_raw_data

date_str = "2025-03-05"
input_data_file = f"{date_str}_scipost_pre_processed.json"
enriched_data_file = f"{date_str}_scipost_enriched.json"

input_data_folder = Path.home() / "Nextcloud/TSOSI_data/scipost/1_pre_processed"
enriched_data_folder = Path.home() / "Nextcloud/TSOSI_data/scipost/3_enriched"

input_path = str(input_data_folder / input_data_file)
enriched_path = str(enriched_data_folder / enriched_data_file)
enrich_scipost_raw_data(input_path, enriched_path)

### 5 - Generate TSOSI data file


In [None]:
from pathlib import Path
import sys
import os
import django
from datetime import date

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent.parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.preparation.scipost.default import get_config

date_data = date(2025, 3, 5)
file_path = (
    Path.home()
    / "Nextcloud/TSOSI_data/scipost/3_enriched/2025-03-05_scipost_enriched.json"
)
config = get_config(str(file_path), date_data)
config.generate_data_file()