In [24]:
import requests
import json
from wsgiref import headers
import time
import re

In [28]:
# Specify the path to your JSON file
id_path = 'patentsView_ids.json'

# Initialize an empty dictionary
id_dictionary = {}

try:
    # Open the JSON file in read mode
    with open(id_path, 'r') as json_file:
        # Load the JSON data into the dictionary
        id_dictionary = json.load(json_file)
except FileNotFoundError:
    print("JSON file not found.")
except json.JSONDecodeError:
    print("Error decoding JSON data.")

In [29]:
def get_dictionary(work):
    result = {
        "id": patent["patent_id"],
        "earliest_date": patent["patent_earliest_application_date"],
        "abstract": patent["patent_abstract"],
        "created_date": patent["patent_date"],
        "cited_count": patent["patent_num_total_documents_cited"],
        "cited_by_count": patent["patent_num_times_cited_by_us_patents"],
        "title": patent["patent_title"],
        "type": patent["patent_type"],
        "application":[
            {
                "application_id": application["application_id"],
                "application_type": application["application_type"],
                "filing_type": application["filing_type"],
                "filing_date": application["filing_date"]
            }
            for application in patent.get("application",[])
        ],
        "inventors": [
          {
            "id": inventor["inventor"][47:-1],
            "name_first": inventor["inventor_name_first"],
            "name_last": inventor["inventor_name_last"],
            "position": inventor["inventor_sequence"],
          }
          for inventor in patent.get("inventors",[])
        ],
        "assignees": [
          {
            "id": assignee["assignee"][47:-1],
            "name_first": assignee["assignee_individual_name_first"],
            "name_last": assignee["assignee_individual_name_last"],
            "organization": assignee["assignee_organization"],
          }
          for assignee in patent.get("assignees", [])
        ],
        "year": patent["patent_year"],
        "related_documents": [
          {
            "id": document["related_doc_number"],
            "type": document["related_doc_type"],
          }
          for document in patent.get("us_related_documents",[])
        ],
        "us_term_of_grant": patent.get("us_term_of_grant",[]),
        "api": "patentsView"
    }

    return result


In [30]:
# Base URL for the patentsView API
base_query_url_patent = "https://search.patentsview.org/api/v1/patent/"

# Set the API token
headers = {
    'accept': 'application/json',
    'X-Api-Key': "rk5gqFtM.bmIQOH22VkBnkQqQKgVgwFkqIIVQ9ENw",
}

# Initialize a dictionary to store the queried IDs for each physician-scientist
works_dictionary = {}

# Initialize a set to store the failed queries
failed_queries = set()

# Start a timer to track the total query time
start_time = time.time()
queries_made = 0

for display_name in id_dictionary:
    after = None

    for individual_id in id_dictionary[display_name]:
        query_params = {
            "q": json.dumps({"inventors.inventor_id": individual_id}),
            "f": '["application.application_id","application.application_type","application.filing_type","application.filing_date","inventors.inventor_id","inventors.inventor_name_last","inventors.inventor_name_first","inventors.inventor_sequence","assignees.assignee_id","assignees.assignee_individual_name_first","assignees.assignee_individual_name_last","assignees.assignee_organization","patent_abstract","patent_date","patent_earliest_application_date","patent_id","patent_num_times_cited_by_us_patents","patent_num_total_documents_cited","patent_title","patent_type","patent_year","us_related_documents.related_doc_number","us_related_documents.related_doc_type","us_term_of_grant.disclaimer_date"]',
            "o": json.dumps({"size": 100}),
        }

        while True:
            try:
                response = requests.get(base_query_url_patent, headers=headers, params=query_params)

                if response.status_code == 200:
                    patents = response.json()["patents"]

                    for patent in patents:
                        patent_data = get_dictionary(patent)
                        
                        if patent_data["id"] not in works_dictionary:
                            works_dictionary[patent_data["id"]] = patent_data
                            id_dictionary[display_name][individual_id]["works"].append(patent_data["id"])
                
                if not patents:
                    break
                    
                after = patents[-1]["patent_id"]
                query_params["o"] = json.dumps({"size": 25, "after": after})

                queries_made += 1

                if queries_made == 45:
                    time.sleep(60 - (time.time() - start_time))
                    queries_made = 0

            except Exception as e:
                print(e)
                failed_queries.add((individual_id, after))
                break

        print(display_name + " " + individual_id + " done")

print(f"Failed queries: {failed_queries}")


Derek Abbott fl:de_ln:abbott-3 done
Evan Abel fl:ev_ln:abel-1 done
Failed queries: set()


In [33]:
with open("patentsView_final_works.json", "w") as f:
    json.dump(works_dictionary, f, indent=4)

with open("patentsView_failed_work_queries.json", "w") as f:
    json.dump(list(failed_queries), f, indent=4)
    
with open("patentsView_final_ids.json", "w") as f:
    json.dump(id_dictionary, f, indent=4)
