In [None]:
import logging

logging.basicConfig(level=logging.WARNING, force=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Configuration from config/default.py file.
try:
    # Configuration is in the `config/default.py` file.
    from config import default as config

    WORDLIFT_KEY = config.WORDLIFT_KEY
except ImportError:
    logging.warning("Cannot import configuration from local `config/default.py` file.")

# Configuration from Google Colab Secrets.
try:
    from google.colab import userdata

    WORDLIFT_KEY = userdata.get('WORDLIFT_KEY')
except ImportError:
    logging.warning("Cannot import configuration from google.colab.usermap.")

if WORDLIFT_KEY is None:
    raise ValueError('Configuration not set')

In [None]:
import sys

if "google.colab" in sys.modules:
    !pip install \
    "tqdm>=4.67.1,<5.0.0" \
    "wordlift-sdk @ git+https://github.com/wordlift/python-sdk.git"

In [None]:
from tqdm.asyncio import tqdm
from wordlift_sdk.client import ClientConfigurationFactory
from wordlift_sdk.utils import delayed, create_dataframe_of_entities_with_embedding_vectors
from wordlift_sdk.internal_link import create_internal_link_handler

# Defining the host is optional and defaults to https://api.wordlift.io
# See configuration.py for a list of all supported configuration parameters.
api_url = 'https://api.wordlift.io'
configuration = ClientConfigurationFactory(key=WORDLIFT_KEY).create()

In [None]:
async def main() -> None:
    entities_with_embedding_vectors_df = await create_dataframe_of_entities_with_embedding_vectors(WORDLIFT_KEY)

    # Enrich the Graph, notice that here we pass our callback `parse_html` which will return Patch requests, no need to deal with the actual API. We're polite and not making more than 2 concurrent reqs.
    handler = create_internal_link_handler(configuration, 'getting_started')
    await tqdm.gather(
        *[delayed(handler, 2)(row) for index, row in entities_with_embedding_vectors_df.iterrows()],
        total=len(entities_with_embedding_vectors_df)
    )

    # Print the ID of the entities processed
    for index, row in entities_with_embedding_vectors_df.iterrows():
        logger.info(row['url'] + " " + row['iri'])


await main()
