In [None]:
import httpx
from httpx import ConnectError


def clean(input_string):
    return input_string.strip().lower().replace(",", "")


def clean_csv(input_string):
    return [clean(y) for y in str(input_string).split(", ") if y != ""]


def get_wikidata_id(concept_name):
    response = httpx.get(
        "https://www.wikidata.org/w/api.php",
        params={
            "action": "wbsearchentities",
            "language": "en",
            "format": "json",
            "search": concept_name,
        },
    ).json()

    # naively select the first result
    wikidata_id = response["search"][0]["id"]
    return wikidata_id


def get_wikidata(wikidata_id):
    response = httpx.get(
        "http://www.wikidata.org/wiki/Special:EntityData/" f"{wikidata_id}.json"
    ).json()

    data = response["entities"][wikidata_id]

    return data


def get_variant_names(
    concept_name, languages=["en", "en-gb", "en-ca", "en-us", "en-simple"]
):
    try:
        wikidata_id = get_wikidata_id(concept_name)
        data = get_wikidata(wikidata_id)
        labels = [
            label["value"]
            for label in data["labels"].values()
            if label["language"] in languages
        ]
        aliases = [
            alias["value"]
            for group in data["aliases"].values()
            for alias in group
            if alias["language"] in languages
        ]
        variant_names = list(set([clean(name) for name in labels + aliases]))

    except (IndexError, KeyError, ConnectError):
        variant_names = []

    return variant_names


def get_description(concept_name):
    try:
        wikidata_id = get_wikidata_id(concept_name)
        response = httpx.get(
            "http://www.wikidata.org/wiki/Special:EntityData/" f"{wikidata_id}.json"
        ).json()

        data = response["entities"][wikidata_id]
        description = data["descriptions"]["en"]["value"]

    except (IndexError, KeyError, ConnectError):
        description = ""

    return description

In [None]:
wikidata_id = "Q1420"
wikidata = get_wikidata(wikidata_id)

# lcsh

In [None]:
try:
    lcsh_id = wikidata["claims"]["P244"][0]["mainsnak"]["datavalue"]["value"]
except (KeyError, IndexError):
    lcsh_id = None

In [None]:
lcsh_id

In [None]:
url = f"http://id.loc.gov/authorities/subjects/{lcsh_id}.json"

try:
    response = httpx.get(url)
except ValueError as e:
    raise e
if response.status_code == 200:
    pass
elif response.status_code == 404:
    raise ValueError(f"{lcsh_id} is not a valid library of congress ID")
else:
    raise ValueError(f"something unexpected happened when calling url: {url}")

for element in response.json():
    if element["@id"] == url.replace(".json", ""):
        lcsh_data = element

In [None]:
[label["@value"] for label in lcsh_data["http://www.w3.org/2004/02/skos/core#altLabel"]]

In [None]:
[
    label["@value"]
    for label in lcsh_data["http://www.w3.org/2004/02/skos/core#prefLabel"]
]

# mesh

In [None]:
try:
    mesh_id = wikidata["claims"]["P486"][0]["mainsnak"]["datavalue"]["value"]
except (KeyError, IndexError):
    mesh_id = None

In [None]:
mesh_id

In [None]:
response = httpx.get(
    url="https://meshb.nlm.nih.gov/api/search/record",
    params={
        "searchInField": "ui",
        "sort": "",
        "size": "1",
        "searchType": "exactMatch",
        "searchMethod": "FullWord",
        "q": mesh_id,
    },
)
try:
    generated_response = response.json()["hits"]["hits"][0]["_source"]["_generated"]
except IndexError:
    raise ValueError(f"{mesh_id} is not a valid MeSH ID")
except KeyError:
    requested_url = response.url
    raise ValueError(f"something unexpected happened when calling url: {requested_url}")
generated_response