In [5]:
from pathlib import Path
import sys
import os
import django
from datetime import date
import pandas as pd
from asgiref.sync import sync_to_async

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from django.db.models import F
from tsosi.models import Transfert, Entity
from tsosi.data.entity_matching import matchable_entities, match_entities
from tsosi.models.transfert import (
    MATCH_CRITERIA_SAME_NAME_COUNTRY,
    MATCH_CRITERIA_SAME_NAME_ONLY,
    MATCH_CRITERIA_SAME_PID,
    MATCH_CRITERIA_SAME_NAME_URL,
)


@sync_to_async
def to_run():
    return matchable_entities()


res = await to_run()

### Prepare raw data for manual matching


In [10]:
from pathlib import Path
import sys
import os
import django
import pandas as pd
from asgiref.sync import sync_to_async

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.pid_matching import prepare_manual_matching


file_name = "2024-09-20-DOAJ_Library_Report_2021-2023_raw.xlsx"
file_folder = "/home/guillaume-alzieu/Dev/TSOSI/TSOSI_data/doaj/1_raw/"
sheet = "2021"
name_column = "Institution name"


@sync_to_async
def to_run():
    xls = pd.ExcelFile(f"{file_folder}{file_name}")
    data = pd.read_excel(xls, sheet)
    return prepare_manual_matching(data, name_column)


res = await to_run()

In [None]:
res

In [12]:
file_folder = "/home/guillaume-alzieu/Dev/TSOSI/TSOSI_data/doaj/2_matched/"
file_to_write = "2024-09-20-DOAJ_Library_Report_2021_matched.xlsx"
file_path = f"{file_folder}{file_to_write}"
res.to_excel(file_path, index=False)

### Prepare enriched data


In [8]:
from pathlib import Path
import sys
import os
import django
import pandas as pd
from asgiref.sync import sync_to_async

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

from tsosi.data.pid_matching import process_enriched_data
from tsosi.models.transfert import TRANSFERT_ENTITY_TYPE_EMITTER

file = "/home/guillaume-alzieu/Dev/TSOSI/TSOSI_data/doaj/3_enriched/TSOI_publisher_support_2024_enriched_done.xlsx"
sheet = "Sheet1"
name_column = "Company"
xls = pd.ExcelFile(file)
data = pd.read_excel(xls, sheet)

res = process_enriched_data(data, name_column, TRANSFERT_ENTITY_TYPE_EMITTER)

file_to_write = "/home/guillaume-alzieu/Dev/TSOSI/TSOSI_data/doaj/4_prepared/TSOI_publisher_support_2024_prepared.xlsx"
res.to_excel(file_to_write, index=False)

In [None]:
import pycountry

{c.name: c for c in pycountry.countries}

### Redis setup


In [3]:
from pathlib import Path
import sys
import os
import django
from asgiref.sync import sync_to_async

# Add the parent directory to the system path and setup django
BASE_DIR = str(Path(os.getcwd()).resolve().parent.parent)

if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend_site.settings")

django.setup()

# from tsosi.data.token_bucket import ror_token_bucket as bucket
from tsosi.data.token_bucket import WIKIDATA_TOKEN_BUCKET as bucket
from tsosi.data.token_bucket import TOKEN_LUA_SCRIPT, TOKEN_SCRIPT_SHA_KEY


async def to_run():
    bucket.refill()
    # await bucket.redis.decrby(bucket.token_count_key, 10)
    # return await bucket.redis.get(bucket.token_count_key)
    # return await bucket.redis.get(bucket.last_refill_time_key)
    # return await bucket.consume(170)

    # script_sha = await bucket._get_script_sha()
    tokens = bucket.consume(10000)

    # script_sha = await bucket.redis.script_load(TOKEN_LUA_SCRIPT)
    # await bucket.redis.set(TOKEN_SCRIPT_SHA_KEY, script_sha)
    # n_script_sha = await bucket.redis.get(TOKEN_SCRIPT_SHA_KEY)
    # print(script_sha)
    # tokens = await bucket.redis.evalsha(
    #     n_script_sha.decode("utf-8"),
    #     1,
    #     bucket.token_count_key,
    #     170,
    # )
    # return tokens

    # s_sha = await bucket.redis.get(TOKEN_SCRIPT_SHA_KEY)
    # tokens = await bucket.redis.evalsha(s_sha, 1, bucket.token_count_key, 1)
    return tokens


res = await to_run()

INFO token_bucket - Consumed 0 tokens from bucket wikidata.


In [2]:
import time

res
# n_res = float(res.decode("utf-8"))
# print(time.time() - n_res)

0