Skip to content

Commit

Permalink
manubot#337 Add timeout for url search.
Browse files Browse the repository at this point in the history
  • Loading branch information
xihh87 committed Jun 17, 2022
1 parent e739025 commit 56c5b33
Showing 1 changed file with 22 additions and 9 deletions.
31 changes: 22 additions & 9 deletions manubot/cite/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

CSLItem = Dict[str, Any]

default_timeout = 3


class Handler_URL(Handler):

Expand All @@ -23,11 +25,13 @@ def standardize_prefix_accession(self, accession):
accession = f"{self.prefix_lower}:{accession}"
return self.standard_prefix, accession

def get_csl_item(self, citekey):
return get_url_csl_item(citekey.standard_accession)
def get_csl_item(self, citekey, timeout_seconds: int = default_timeout):
return get_url_csl_item(
citekey.standard_accession, timeout_seconds=timeout_seconds
)


def get_url_csl_item(url: str) -> CSLItem:
def get_url_csl_item(url: str, timeout_seconds: int = default_timeout) -> CSLItem:
"""
Get csl_item for a URL trying a sequence of strategies.
Expand All @@ -38,7 +42,7 @@ def get_url_csl_item(url: str) -> CSLItem:
"""
for retriever in url_retrievers:
try:
return retriever(url)
return retriever(url, timeout_seconds=timeout_seconds)
except Exception as error:
logging.warning(
f"Error in {retriever.__name__} for {url} "
Expand All @@ -48,22 +52,26 @@ def get_url_csl_item(url: str) -> CSLItem:
raise Exception(f"all get_url_csl_item methods failed for {url}")


def get_url_csl_item_zotero(url: str) -> CSLItem:
def get_url_csl_item_zotero(
url: str, timeout_seconds: int = default_timeout
) -> CSLItem:
"""
Use Zotero's translation-server to generate a CSL Item for the specified URL.
"""
from manubot.cite.zotero import export_as_csl, web_query

zotero_data = web_query(url)
csl_data = export_as_csl(zotero_data)
csl_data = export_as_csl(zotero_data, timeout_seconds=timeout_seconds)
(csl_item,) = csl_data
if not csl_item.get("URL"):
# some Zotero translators don't set URL. https://github.com/manubot/manubot/issues/244
csl_item["URL"] = url
return csl_item


def get_url_csl_item_greycite(url: str) -> CSLItem:
def get_url_csl_item_greycite(
url: str, timeout_seconds: int = default_timeout
) -> CSLItem:
"""
Uses Greycite which has experiened uptime problems in the past.
API calls seem to take at least 15 seconds. Browser requests are much
Expand All @@ -85,7 +93,10 @@ def get_url_csl_item_greycite(url: str) -> CSLItem:
"User-Agent": get_manubot_user_agent(),
}
response = requests.get(
"http://greycite.knowledgeblog.org/json", params={"uri": url}, headers=headers
"http://greycite.knowledgeblog.org/json",
params={"uri": url},
headers=headers,
timeout=timeout_seconds,
)
response.raise_for_status()
# Some Greycite responses were valid JSON besides for an error appended
Expand All @@ -97,7 +108,9 @@ def get_url_csl_item_greycite(url: str) -> CSLItem:
return csl_item


def get_url_csl_item_manual(url: str) -> CSLItem:
def get_url_csl_item_manual(
url: str, timeout_seconds: int = default_timeout
) -> CSLItem:
"""
Manually create csl_item for a URL.
"""
Expand Down

0 comments on commit 56c5b33

Please sign in to comment.