Skip to content

Commit

Permalink
manubot#337 Add timeout to zotero searches.
Browse files Browse the repository at this point in the history
  • Loading branch information
xihh87 committed Jun 17, 2022
1 parent 56c5b33 commit dc5d25a
Showing 1 changed file with 28 additions and 12 deletions.
40 changes: 28 additions & 12 deletions manubot/cite/zotero.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,20 @@
base_url = "https://translate.manubot.org"
"""URL that provides access to the Zotero translation-server API"""

default_timeout = 3

def web_query(url: str) -> ZoteroData:

def web_query(url: str, timeout_seconds: int = default_timeout) -> ZoteroData:
"""
Return Zotero citation metadata for a URL as a list containing a single element that
is a dictionary with the URL's metadata.
"""
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
params = {"single": 1}
api_url = f"{base_url}/web"
response = requests.post(api_url, params=params, headers=headers, data=str(url))
response = requests.post(
api_url, params=params, headers=headers, data=str(url), timeout=timeout_seconds
)
try:
zotero_data = response.json()
except Exception as error:
Expand All @@ -52,7 +56,7 @@ def web_query(url: str) -> ZoteroData:
return zotero_data


def search_query(identifier: str) -> ZoteroData:
def search_query(identifier: str, timeout_seconds: int = default_timeout) -> ZoteroData:
"""
Retrive Zotero metadata for a DOI, ISBN, PMID, or arXiv ID.
Example usage:
Expand All @@ -66,7 +70,9 @@ def search_query(identifier: str) -> ZoteroData:
"""
api_url = f"{base_url}/search"
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
response = requests.post(api_url, headers=headers, data=str(identifier))
response = requests.post(
api_url, headers=headers, data=str(identifier), timeout=timeout_seconds
)
try:
zotero_data = response.json()
except Exception as error:
Expand All @@ -93,7 +99,9 @@ def _passthrough_zotero_data(zotero_data: ZoteroData) -> ZoteroData:
return zotero_data


def export_as_csl(zotero_data: ZoteroData) -> CSLItems:
def export_as_csl(
zotero_data: ZoteroData, timeout_seconds: int = default_timeout
) -> CSLItems:
"""
Export Zotero JSON data to CSL JSON using a translation-server /export query.
Performs a similar query to the following curl command:
Expand All @@ -107,7 +115,13 @@ def export_as_csl(zotero_data: ZoteroData) -> CSLItems:
api_url = f"{base_url}/export"
params = {"format": "csljson"}
headers = {"User-Agent": get_manubot_user_agent()}
response = requests.post(api_url, params=params, headers=headers, json=zotero_data)
response = requests.post(
api_url,
params=params,
headers=headers,
json=zotero_data,
timeout=timeout_seconds,
)
if not response.ok:
message = f"export_as_csl: translation-server returned status code {response.status_code}"
logging.warning(f"{message} with the following output:\n{response.text}")
Expand All @@ -120,25 +134,27 @@ def export_as_csl(zotero_data: ZoteroData) -> CSLItems:
return csl_items


def get_csl_item(identifier: str) -> CSLItem:
def get_csl_item(identifier: str, timeout_seconds: int = default_timeout) -> CSLItem:
"""
Use a translation-server search query followed by an export query
to return a CSL Item (the first & only record of the returned CSL JSON).
"""
zotero_data = search_query(identifier)
csl_items = export_as_csl(zotero_data)
zotero_data = search_query(identifier, timeout_seconds=timeout_seconds)
csl_items = export_as_csl(zotero_data, timeout_seconds=timeout_seconds)
(csl_item,) = csl_items
return csl_item


def search_or_web_query(identifier: str) -> ZoteroData:
def search_or_web_query(
identifier: str, timeout_seconds: int = default_timeout
) -> ZoteroData:
"""
Detect whether `identifier` is a URL. If so,
retrieve zotero metadata using a /web query.
Otherwise, retrieve zotero metadata using a /search query.
"""
if is_http_url(identifier):
zotero_data = web_query(identifier)
zotero_data = web_query(identifier, timeout_seconds=timeout_seconds)
else:
zotero_data = search_query(identifier)
zotero_data = search_query(identifier, timeout_seconds=timeout_seconds)
return zotero_data

0 comments on commit dc5d25a

Please sign in to comment.