In [11]:
import requests
import json
from wsgiref import headers
import time

In [2]:
# Specify the path to your JSON file
asci_aap_path = 'asci_aap_dataJSON.json'

# Initialize an empty dictionary
people_dictionary = {}

try:
    # Open the JSON file in read mode
    with open(asci_aap_path, 'r') as json_file:
        # Load the JSON data into the dictionary
        people_dictionary = json.load(json_file)
except FileNotFoundError:
    print("JSON file not found.")
except json.JSONDecodeError:
    print("Error decoding JSON data.")

In [13]:
# Initialize dictionary representing a each physician-scientist's queried IDs
# Key:
# - "{first_name} {last_name}" (dict): a dict of dictionaries of possible inventors keyed on id
#     Each dictionary contains:
#     - "id" (str): patentsView inventor id
#     - "counts_by_year" (list): a list of dictionaries documenting activity of inventor
#         Each dictionary contains: 
#         - "year" (int): the year
#         - "works_count" (int): inventor's patent count that year
#     - "first_name" (str): inventor's first name
#     - "last_name" (str): inventor's last name
#     - "created_date" (str): inventor's first seen date
#     - "works_count" (int): inventor's total number of patents
#     - "state" (str): inventor's last known state
#     - "country" (str): inventor's last known country
#     - "gender" (int): inventor's gender status
#     - "works" (list): list of ids to be populated

# Base URL for the patentsView API
base_query_url_inventor = 'https://search.patentsview.org/api/v1/inventor/'

# Set the API token
headers={
    'accept': 'application/json',
    'X-Api-Key': "rk5gqFtM.bmIQOH22VkBnkQqQKgVgwFkqIIVQ9ENw",
}

# Initialize a dictionary to store the queried IDs for each physician-scientist
ids_dictionary = {}

# Initialize a set to store the failed queries
failed_queries = set()

# Start a timer to track the total query time
start_time = time.time()
queries_made = 0

for person in people_dictionary["people"]:
    after = None
    
    # Get the person's first and last name
    first_name = person["first_name"]
    last_name = person["last_name"]

    # Create a query name by combining the person's first and last name
    query_name = first_name + " " + last_name

    # Create a query parameter dictionary
    query_params = {
        "q": json.dumps({
            "_and": [
                {"inventor_name_first": first_name},
                {"inventor_name_last": last_name}
            ]
        }),
        "f": '["inventor_id","inventor_name_last","inventor_name_first","inventor_male_flag","inventor_lastknown_state","inventor_lastknown_country","inventor_num_patents","inventor_years"]',
        "o": json.dumps({"size": 25}),
    }

    # Initialize a counter to track the number of queries made


    # Keep making queries until the request is successful or the maximum number of queries is reached
    while True:
        try:
            # Make the API request
            response = requests.get(base_query_url_inventor, headers=headers, params=query_params)

            # Check if the request was successful
            if response.status_code == 200:
                # Get the list of inventors from the response
                inventors = response.json()["inventors"]

                # Iterate over the inventors and add them to the dictionary
                for inventor in inventors:
                    inventor_data = {
                        "id": inventor["inventor_id"],
                        "counts_by_year": [{
                            "year": each["year"],
                            "works_count": each["num_patents"]
                        } for each in inventor["inventor_years"]],
                        "name_first": inventor["inventor_name_first"],
                        "name_last": inventor["inventor_name_last"],
                        "gender": inventor["inventor_male_flag"],
                        "last_known_state": inventor["inventor_lastknown_state"],
                        "last_known_country": inventor["inventor_lastknown_country"],
                        "created_date": inventor.get("inventor_first_seen_date"),
                        "works_count": inventor.get("inventor_num_patents"),
                        "api": "patentsView",
                        "works" : []
                    }

                    if query_name not in ids_dictionary:
                        ids_dictionary[query_name] = {}

                    ids_dictionary[query_name][inventor["inventor_id"]] = inventor_data

            # Break out of the loop if there are no more inventors
            if not inventors:
                break

            # Update the query parameters with the next `after` value
            after = inventors[-1]["inventor_id"]
            query_params["o"] = json.dumps({"size":25, "after":after})

            # Increment the query counter
            queries_made += 1
            
            # If the maximum number of queries has been reached, sleep for 45 seconds before trying again
            if queries_made == 45:
                time.sleep(45 - (time.time() - start.time()))
                queries_made = 0
                
        # If the request is unsuccessful, add the query name and `after` value to the failed queries set
        except Exception as e:
            print(e)
            failed_queries.add((query_name, after))
            break

    print(query_name + " done")

# Print the failed queries
print(f"Failed queries: {failed_queries}")

Kjersti Aagaard done
Derek Abbott done
Francois Abboud done
Hanna Abboud done
Omar Abdel-Wahab done
Sarki Abdulkadir done
E. Abel done


KeyboardInterrupt: 

In [14]:
with open("patentsView_ids.json", "w") as f:
    json.dump(ids_dictionary, f, indent=4)

with open("patentsView_failed_queries.json", "w") as f:
    json.dump(list(failed_queries), f, indent=4)
