In [1]:
import requests
import time
from typing import (
    Generator,
    Iterable,
    Optional,
    TypeVar,
    Union
)

T = TypeVar("T")

PUG-REST encodes these three pieces of information into a simple one-line **uniform resource locator (URL)**, which consists of the *input*, *operation*, and *output* parts, preceded by a prefix common to all PUG-REST requests.  
Let's build it!

In [2]:
def input_specification(domain: str, namespace: str, identifiers: Union[int, str]) -> str:
    return f"{domain}/{namespace}/{identifiers}"


def operation_specification(operation: str, property_tags: Optional[str] = None) -> str:
    if property_tags is None:
        return f"{operation}"
    else:
        return f"{operation}/{property_tags}"


def build_url(input_spec: str, operation_spec: str, output_format: str) -> str:
    base = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
    return f"{base}/{input_spec}/{operation_spec}/{output_format}"

In [3]:
def generate_ids() -> Generator[int, None, None]:
    n = 1
    while True:
        yield n
        n += 1
        
        
def delay_iterations(ids: Iterable[T], width: float, count: int) -> Generator[T, None, None]:
    window = []
    for i in ids:
        yield i
        t = time.monotonic()
        window.append(t)
        while t - width > window[0]:
            del window[0]
        if len(window) > count:
            t0 = window[0]
            delay = t - t0
            time.sleep(delay)

In [4]:
def join_w_comma(*args: object) -> str:
    return ",".join(map(str, args))


def prepare_request(identifiers: Iterable[T], operation: str, property_tags: Optional[list] = None) -> str:
    joined_identifiers = join_w_comma(*identifiers)
    input_spec = input_specification(domain, namespace, joined_identifiers)
    if property_tags is None:
        operation_spec = operation_specification(operation)
    else:
        joined_tags = join_w_comma(*property_tags)
        operation_spec = operation_specification(operation, joined_tags)
    url = build_url(input_spec, operation_spec, output)
    return url


def request_data(url: str, **params: str) -> requests.Response:
    response = requests.get(url, params=params)
    data = response.json()
    return data
    

def execute_request(url, params):
    print(url)
    res = request_data(url, **params)
    return res

In [5]:
domain = "compound"
namespace = "cid"
operation = "property"
output = "JSON"
tags = ["MolecularFormula", "MolecularWeight", "IUPACName", "CanonicalSMILES"]
# additional_tags = ["InChI", "XLogP", "HBondDonorCount", "HBondAcceptorCount", "RotatableBondCount", "Volume3D"]

results = {}
t_start = time.monotonic()
for i in delay_iterations(generate_ids(), 60.0, 400):
    url = prepare_request([i], operation,  tags)
    try:
        res = execute_request(url, {})
    except requests.HTTPError:
        break
    else:
        results[i] = res
t_stop = time.monotonic()
t_run = t_stop - t_start
print(t_run)

https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/1/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/2/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/3/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/4/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/5/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/6/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/7/property/MolecularFormula,MolecularWeight,IUPACName,CanonicalSMILES/JSON
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/8/property/MolecularFormula,Molecul

KeyboardInterrupt: 

In [None]:
print(results)