In [58]:
import dimcli
from dimcli.utils import *
import re
import psycopg2

import os, sys, time, json
from tqdm.notebook import tqdm as progressbar

import pandas as pd

import plotly.express as px
from plotly.offline import plot
if not 'google.colab' in sys.modules:
  # make js dependecies local / needed by html exports
    from plotly.offline import init_notebook_mode
    init_notebook_mode(connected=True)

print("==\nLogging in..")
# https://digital-science.github.io/dimcli/getting-started.html#authentication
ENDPOINT = "https://app.dimensions.ai"
if 'google.colab' in sys.modules:
    import getpass
    KEY = getpass.getpass(prompt='API Key: ')  
    dimcli.login(key=KEY, endpoint=ENDPOINT)
else:
    KEY = "9F8D648F0D7E437CB1736BEBDF007F02"
    dimcli.login(key=KEY, endpoint=ENDPOINT)
dsl = dimcli.Dsl()

==
Logging in..
[2mDimcli - Dimensions API Client (v1.2)[0m
[2mConnected to: <https://app.dimensions.ai/api/dsl> - DSL v2.8[0m
[2mMethod: manual login[0m


In [13]:
# Specify the path to your JSON file
id_path = 'asci_aap_dataJSON.json'

# Initialize an empty dictionary
id_dictionary = {}

try:
    # Open the JSON file in read mode
    with open(id_path, 'r') as json_file:
        # Load the JSON data into the dictionary
        id_dictionary = json.load(json_file)
except FileNotFoundError:
    print("JSON file not found.")
except json.JSONDecodeError:
    print("Error decoding JSON data.")

In [14]:
def execute_command(query):
    try:
        connection = psycopg2.connect(
            "postgresql://selina04_mit_edu:iXr9mWuy_K_OSQv-tVgEug@livid-dibbler-6457.g8z.cockroachlabs.cloud:26257/livid-dibbler-6457.patents?sslmode=verify-full"
        )

        # Create a cursor and execute the insert statement
        cursor = connection.cursor()
        # print('This is current command: ',query)
        cursor.execute(query)
        connection.commit()  # Commit the transaction
        # print("Data inserted successfully")
        cursor.close()
        connection.close()

    except psycopg2.Error as error:
        print(f"Error: {query}")
        print("------")
        print(error)

In [60]:
query_work2_template = """search patents in inventors for "{}" return patents [abstract + application_number + assignee_names + assignees + associated_grant_ids + cited_by_ids + claims_amount + current_assignee_names + current_assignees + date + date_inserted + dimensions_url + expiration_date + federal_support + filing_date + filing_status + funders + granted_date + granted_year + id + inventor_names + inventors + kind + priority_date + priority_year + publication_date + publication_ids + publication_year + publications + reference_ids + researchers + score + times_cited + title + year + category_rcdc]"""

for author in id_dictionary["people"][26:35]:

    
    #Get the person's first and last name
    first_name = author["first_name"].strip(".")
    middle_name = author["middle_name"].strip(".")
    last_name = author["last_name"].strip(".")
    

    #Create a query name by combining the person's first and last name
    
    if author["middle_name"]: 
        author_name = first_name + " " + middle_name + " " + last_name
    else: 
        author_name = first_name + " " + last_name
    
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)
    
    execute_command(f"""CREATE TABLE IF NOT EXISTS {cleaned_name} (
    id STRING PRIMARY KEY,
    abstract STRING,
    application_number STRING,
    assignee_names STRING,
    assignees STRING,
    associated_grant_ids STRING,
    claims_amount STRING,
    current_assignee_names STRING,
    current_assignees STRING,
    category_rcdc STRING,
    date STRING,
    date_inserted STRING,
    dimensions_url STRING,
    expiration_date STRING,
    federal_support STRING,
    filing_date STRING,
    filing_status STRING,
    funders STRING,
    granted_date STRING,
    granted_year STRING,
    inventor_names STRING,
    inventors STRING,
    kind STRING,
    priority_date STRING,
    priority_year STRING,
    publication_date STRING,
    publication_ids STRING,
    publication_year STRING,
    publications STRING,
    reference_ids STRING,
    researchers STRING,
    score STRING,
    times_cited STRING,
    title STRING,
    year STRING
);""")
            
    
    print(f"querying for {author_name}")
    q = query_work2_template.format(author_name)
    
    result = dsl.query_iterative(q, show_results=None, limit=500, skip=0, pause=1.5, force=False, maxlimit=0, verbose=None, _tot_count_prev_query=0, _warnings_tot=None)
    result_dict = result.data["patents"]
    
    for patent in result_dict: 
        fields = []
        values = []
        
        for field, value in patent.items():
            if not isinstance(value, str):
                # Convert non-string values to JSON string
                value = json.dumps(value);
            value = value.replace("'", "''")
            fields.append(field)
            values.append(f"'{value}'")  # Enclose string values in single quotes

        # Join the lists to create the SQL query
        columns = ', '.join(fields)
        values_str = ', '.join(values)

        insert_query = insert_query = f"INSERT INTO {cleaned_name} ({columns}) VALUES({values_str});"

        execute_command(insert_query)




Starting iteration with limit=500 skip=0 ...[0m


querying for John S Adams


===
Records extracted: 0[0m
Starting iteration with limit=500 skip=0 ...[0m


querying for Raymond Adams


0-8 / 8 (0.45s)[0m
===
Records extracted: 8[0m


Error: INSERT INTO Raymond_Adams (abstract, application_number, assignee_names, assignees, claims_amount, date, date_inserted, dimensions_url, filing_date, filing_status, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, reference_ids, score, times_cited, title, year) VALUES('<p num="0000">A medical port has a housing forming an interior with a fluid inlet and a fluid channel extending from the fluid inlet. The housing also has an exterior wall forming at least one radial opening to the interior. In addition to the housing, the medical port also has a resilient valve element within the housing interior configured to control fluid flow through the inlet, and a biasing element movably couplable with the housing. The resilient valve element has a proximate body portion proximate to the radial opening when in the closed mode. The biasing element also has a constraining portion configured to cooperate with the radial opening to contact at

Starting iteration with limit=500 skip=0 ...[0m


Error: INSERT INTO Raymond_Adams (application_number, assignee_names, claims_amount, current_assignee_names, date, date_inserted, dimensions_url, expiration_date, filing_date, filing_status, granted_date, granted_year, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, score, times_cited, title, year) VALUES('DE4222178A', '["Werzalit AG and Co KG"]', '2', '["Werzalit AG and Co KG"]', '1992-07-06', '1994-12-15', 'https://app.dimensions.ai/details/patent/DE-4222178-C2', '2012-07-07', '1992-07-06', 'Grant', '1994-12-15', '1994', 'DE-4222178-C2', '["HENKE HERMANN", "GARGES LINFORD D", "ADAMS RAYMOND V"]', '[{"country": "DE", "name": "HENKE HERMANN"}, {"country": "US", "name": "GARGES LINFORD D"}, {"country": "US", "name": "ADAMS RAYMOND V"}]', 'C2', '1992-07-06', '1992', '1994-12-15', '1994', '4.423444', '0', 'Device for the pretreatment of lignocellulosic chips', '1992');
------
FATAL:  codeProxyRefusedConnection: connection refused

Err

0-256 / 256 (2.01s)[0m
===
Records extracted: 256[0m


Error: INSERT INTO William_Adams (application_number, assignee_names, date, date_inserted, dimensions_url, filing_date, filing_status, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, score, times_cited, title, year) VALUES('ZA200207733A', '["IGT A NEVADA CORP"]', '2002-09-26', '2003-06-13', 'https://app.dimensions.ai/details/patent/ZA-200207733-B', '2002-09-26', 'N/A', 'ZA-200207733-B', '["ADAMS WILLIAM", "DEVILLE CANYON", "JOHNSON BRIAN", "MILLER CHARLES", "MCVITTE MICHAEL", "STANEK JAMES"]', '[{"name": "ADAMS WILLIAM"}, {"name": "DEVILLE CANYON"}, {"name": "JOHNSON BRIAN"}, {"name": "MILLER CHARLES"}, {"name": "MCVITTE MICHAEL"}, {"name": "STANEK JAMES"}]', 'B', '2001-09-27', '2001', '2003-06-13', '2003', '2.8210034', '0', 'Method, apparatus and system for gaming using a rotatable payout indicator.', '2002');
------
FATAL:  codeProxyRefusedConnection: connection refused

Error: INSERT INTO William_Adams (abstract, application_num

Error: INSERT INTO William_Adams (abstract, application_number, assignee_names, claims_amount, date, date_inserted, dimensions_url, filing_date, filing_status, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, reference_ids, score, times_cited, title, year) VALUES('<p num="0000">High temperature radio frequency identification (RFID) tags are formed from nesting insulative ceramic structures with a woven cladding provided around an RFID tag. Additional interstitial woven cladding may be positioned between the ceramic structures. The layered approach provides sufficient insulation that allows sustained operation at temperatures above 500-600 degrees centigrade (500-600&#176;C) while being sufficiently transparent to radio frequency (RF) signals to allow interrogation and response for track and trace purposes even at such elevated temperatures.</p>', 'US2019/065893', '["TECH ROI LLC"]', '12', '2019-12-12', '2020-07-02', 'https://app.dim

Error: INSERT INTO William_Adams (abstract, application_number, assignee_names, claims_amount, date, date_inserted, dimensions_url, filing_date, filing_status, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, score, times_cited, title, year) VALUES('<p>A method for interacting with a user through a smart watch can include displaying an interactive image. The method can further include detecting a contact moving continuously from a first edge of the touch-sensitive display toward a second, opposite edge of the touch-sensitive display. In response to the detection, the touch-sensitive display can display select function indicators in accordance with movement of the contact. The select function indicators can comprise four or fewer distinct, selectable icons.</p>', 'US2014051295W', '["I AM PLUS LLC"]', '1', '2014-08-15', '2015-02-19', 'https://app.dimensions.ai/details/patent/WO-2015023955-A2', '2014-08-15', 'Application', 'WO-20150239

Error: INSERT INTO William_Adams (abstract, application_number, assignee_names, claims_amount, date, date_inserted, dimensions_url, filing_date, filing_status, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, reference_ids, score, times_cited, title, year) VALUES('<p>A system and method for inspecting an object. The system has a source for generating a penetrating radiation beam for irradiating the object and at least one detector for detecting the beam after the beam interacts with the object. Furthermore, the system has an aperture interposed between the source and the object, the aperture characterized by a cross-sectional dimension, and the cross-sectional dimension capable of variation on a periodic basis during the course of inspecting the object such that high resolution is obtained for regions of object characterized by lower opacity while penetration is achieved at lower spatial resolution for regions of more substantial op

Error: INSERT INTO William_Adams (application_number, assignee_names, claims_amount, current_assignee_names, date, date_inserted, dimensions_url, expiration_date, filing_date, filing_status, granted_date, granted_year, id, inventor_names, inventors, kind, priority_date, priority_year, publication_date, publication_year, reference_ids, score, times_cited, title, year) VALUES('US29711973', '["Dragon Summit Group Inc"]', '1', '["Dragon Summit Group Inc"]', '2019-11-04', '2022-06-14', 'https://app.dimensions.ai/details/patent/US-D954682-S1', '2037-06-14', '2019-11-04', 'Grant', '2022-06-14', '2022', 'US-D954682-S1', '["AI YU", "ADAMS WILLIAM", "GOH CHEE WEI JAREN"]', '[{"country": "CN", "name": "AI YU"}, {"country": "US", "name": "ADAMS WILLIAM"}, {"country": "SG", "name": "GOH CHEE WEI JAREN"}]', 'S1', '2019-06-06', '2019', '2022-06-14', '2022', '["CN-304641938-S", "KR-300939970-S", "US-D907009-S", "CN-304821432-S", "US-20190098985-A1", "US-D705532-S", "US-D604501-S", "US-20080090622-A1",

KeyboardInterrupt: 

In [42]:
print(id_dictionary["people"][25:30])

[{'year': '2013', 'first_name': 'Christopher', 'middle_name': 'M.', 'last_name': 'Adams', 'phone': 'False', 'email': 'adams.christopher2@mayo.edu', 'affiliation': "['Mayo Clinic', 'University of Iowa Carver College of Medicine']", 'original specialization': "['Endocrinology', 'Internal Medicine']", 'modified specialization': "['Endocrinology', 'Internal Medicine']", 'unactive': 'False', 'organization': 'False', 'email_affiliation': "['ASCI']", 'umbrella_aff': "['mayo']", 'related_aff': '[]', 'umbrella_spec': '[]', 'related_spec': "['Internal Medicine']", 'id_num': "['Cardiovascular Disease', 'Informatics', 'Critical Care Medicine', 'Endocrinology', 'Diabetes', 'Metabolism', 'Gastroenterology', 'Geriatrics', 'Gerontology', 'Hematology', 'Medical Oncology', 'Breast cancer', 'Neuro-oncology', 'Pulmonology', 'Rheumatology', 'Allergy', 'Bone marrow transplantation', 'Cardiology', 'General Medicine', 'Hepatology', 'Infectious Disease', 'Medicine', 'Nephrology']", 'kumu_num': 'P26'}, {'year':

In [None]:
skipped 25,26,27,28,29

search researchers for "\"Satoko Shimazaki\"" return researchers

In [52]:
q = query_work2_template.format(author_name)
    
result = dsl.query_iterative('search researchers for "John S Adams" return researchers', show_results=None, limit=500, skip=0, pause=1.5, force=False, maxlimit=0, verbose=None, _tot_count_prev_query=0, _warnings_tot=None)
print(len(result["researchers"]))


Starting iteration with limit=500 skip=0 ...[0m
0-39 / 39 (0.37s)[0m
===
Records extracted: 39[0m


39
