Skip to content

Commit

Permalink
manubot#337 Add timeout for pubmed search.
Browse files Browse the repository at this point in the history
  • Loading branch information
xihh87 committed Jun 17, 2022
1 parent 50375ed commit f4ba734
Showing 1 changed file with 26 additions and 14 deletions.
40 changes: 26 additions & 14 deletions manubot/cite/pubmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from .citekey import CiteKey
from .handlers import Handler

default_timeout = 3


class Handler_PubMed(Handler):

Expand Down Expand Up @@ -59,11 +61,15 @@ def inspect(self, citekey: CiteKey) -> Optional[str]:
"Double check the PMCID."
)

def get_csl_item(self, citekey: CiteKey):
return get_pmc_csl_item(citekey.standard_accession)
def get_csl_item(self, citekey: CiteKey, timeout_seconds: int = default_timeout):
return get_pmc_csl_item(
citekey.standard_accession, timeout_seconds=timeout_seconds
)


def get_pmc_csl_item(pmcid: str) -> Dict[str, Any]:
def get_pmc_csl_item(
pmcid: str, timeout_seconds: int = default_timeout
) -> Dict[str, Any]:
"""
Get the CSL Item for a PubMed Central record by its PMID, PMCID, or
DOI, using the NCBI Citation Exporter API.
Expand All @@ -82,7 +88,7 @@ def get_pmc_csl_item(pmcid: str) -> Dict[str, Any]:


def _get_literature_citation_exporter_csl_item(
database: str, identifier: str
database: str, identifier: str, timeout_seconds: int = default_timeout
) -> Dict[str, Any]:
"""
https://api.ncbi.nlm.nih.gov/lit/ctxp
Expand All @@ -102,7 +108,7 @@ def _get_literature_citation_exporter_csl_item(
params = {"format": "csl", "id": identifier}
headers = {"User-Agent": get_manubot_user_agent()}
url = f"https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/{database}/"
response = requests.get(url, params, headers=headers)
response = requests.get(url, params, headers=headers, timeout=timeout_seconds)
try:
csl_item = response.json()
except Exception as error:
Expand All @@ -122,7 +128,9 @@ def _get_literature_citation_exporter_csl_item(
return csl_item


def get_pubmed_csl_item(pmid: Union[str, int]) -> Dict[str, Any]:
def get_pubmed_csl_item(
pmid: Union[str, int], timeout_seconds: int = default_timeout
) -> Dict[str, Any]:
"""
Query NCBI E-Utilities to create CSL Items for PubMed IDs.
Expand All @@ -134,7 +142,7 @@ def get_pubmed_csl_item(pmid: Union[str, int]) -> Dict[str, Any]:
headers = {"User-Agent": get_manubot_user_agent()}
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
with _get_eutils_rate_limiter():
response = requests.get(url, params, headers=headers)
response = requests.get(url, params, headers=headers, timeout=timeout_seconds)
try:
xml_article_set = ElementTree.fromstring(response.text)
assert isinstance(xml_article_set, ElementTree.Element)
Expand Down Expand Up @@ -282,7 +290,9 @@ def extract_publication_date_parts(article: ElementTree.Element) -> List[int]:
return date_parts


def get_pmcid_and_pmid_for_doi(doi: str) -> Dict[str, str]:
def get_pmcid_and_pmid_for_doi(
doi: str, timeout_seconds: int = default_timeout
) -> Dict[str, str]:
"""
Query PMC's ID Converter API to retrieve the PMCID and PMID for a DOI.
Does not work for DOIs that are in Pubmed but not PubMed Central.
Expand All @@ -292,7 +302,7 @@ def get_pmcid_and_pmid_for_doi(doi: str) -> Dict[str, str]:
assert doi.startswith("10.")
params = {"ids": doi, "tool": "manubot"}
url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
response = requests.get(url, params)
response = requests.get(url, params, timeout=timeout_seconds)
if not response.ok:
logging.warning(f"Status code {response.status_code} querying {response.url}\n")
return {}
Expand Down Expand Up @@ -323,7 +333,7 @@ def get_pmcid_and_pmid_for_doi(doi: str) -> Dict[str, str]:
return id_dict


def get_pmid_for_doi(doi: str) -> Optional[str]:
def get_pmid_for_doi(doi: str, timeout_seconds: int = default_timeout) -> Optional[str]:
"""
Query NCBI's E-utilities to retrieve the PMID for a DOI.
"""
Expand All @@ -333,7 +343,7 @@ def get_pmid_for_doi(doi: str) -> Optional[str]:
headers = {"User-Agent": get_manubot_user_agent()}
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
with _get_eutils_rate_limiter():
response = requests.get(url, params, headers=headers)
response = requests.get(url, params, headers=headers, timeout=timeout_seconds)
if not response.ok:
logging.warning(f"Status code {response.status_code} querying {response.url}\n")
return None
Expand All @@ -358,14 +368,16 @@ def get_pmid_for_doi(doi: str) -> Optional[str]:
return id_elem.text


def get_pubmed_ids_for_doi(doi: str) -> Dict[str, str]:
def get_pubmed_ids_for_doi(
doi: str, timeout_seconds: int = default_timeout
) -> Dict[str, str]:
"""
Return a dictionary with PMCID and PMID, if they exist, for the specified
DOI. See https://github.com/manubot/manubot/issues/45.
"""
pubmed_ids = get_pmcid_and_pmid_for_doi(doi)
pubmed_ids = get_pmcid_and_pmid_for_doi(doi, timeout_seconds=timeout_seconds)
if not pubmed_ids:
pmid = get_pmid_for_doi(doi)
pmid = get_pmid_for_doi(doi, timeout_seconds=timeout_seconds)
if pmid:
pubmed_ids["PMID"] = pmid
return pubmed_ids
Expand Down

0 comments on commit f4ba734

Please sign in to comment.