In [1]:
import dimcli
from dimcli.utils import *
import re
import psycopg2

import os, sys, time, json
from tqdm.notebook import tqdm as progressbar

import pandas as pd

import plotly.express as px
from plotly.offline import plot
if not 'google.colab' in sys.modules:
  # make js dependecies local / needed by html exports
    from plotly.offline import init_notebook_mode
    init_notebook_mode(connected=True)

print("==\nLogging in..")
# https://digital-science.github.io/dimcli/getting-started.html#authentication
ENDPOINT = "https://app.dimensions.ai"
if 'google.colab' in sys.modules:
    import getpass
    KEY = getpass.getpass(prompt='API Key: ')  
    dimcli.login(key=KEY, endpoint=ENDPOINT)
else:
    KEY = "9F8D648F0D7E437CB1736BEBDF007F02"
    dimcli.login(key=KEY, endpoint=ENDPOINT)
dsl = dimcli.Dsl()

==
Logging in..
[2mDimcli - Dimensions API Client (v1.2)[0m
[2mConnected to: <https://app.dimensions.ai/api/dsl> - DSL v2.8[0m
[2mMethod: manual login[0m


In [2]:
# Specify the path to your JSON file
id_path = 'asci_aap_dataJSON.json'

# Initialize an empty dictionary
id_dictionary = {}

try:
    # Open the JSON file in read mode
    with open(id_path, 'r') as json_file:
        # Load the JSON data into the dictionary
        id_dictionary = json.load(json_file)
except FileNotFoundError:
    print("JSON file not found.")
except json.JSONDecodeError:
    print("Error decoding JSON data.")

In [3]:
def execute_command(query):
    try:
        connection = psycopg2.connect(
            "postgresql://selina04_mit_edu:0FVpbS67MtCcAdOHovqnow@livid-dibbler-6457.g8z.cockroachlabs.cloud:26257/livid-dibbler-6457.grants?sslmode=verify-full"
        )

        # Create a cursor and execute the insert statement
        cursor = connection.cursor()
        # print('This is current command: ',query)
        cursor.execute(query)
        connection.commit()  # Commit the transaction
        # print("Data inserted successfully")
        cursor.close()
        connection.close()

    except psycopg2.Error as error:
        print(f"Error: {query}")
        print("------")
        print(error)

In [6]:
query_work2_template = """search grants for "{}" return grants [abstract + active_year + concepts + concepts_scores + date_inserted + dimensions_url + end_date + funder_orgs + funding_usd + investigators + keywords + original_title + research_orgs + researchers + score + start_date + title + id]"""

for author in id_dictionary["people"][20:35]:

    
    #Get the person's first and last name
    first_name = author["first_name"].strip(".")
    middle_name = author["middle_name"].strip(".")
    last_name = author["last_name"].strip(".")
    

    #Create a query name by combining the person's first and last name
    
    if author["middle_name"]: 
        author_name = first_name + " " + middle_name + " " + last_name
    else: 
        author_name = first_name + " " + last_name
    
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)
    
    execute_command(f"""CREATE TABLE IF NOT EXISTS {cleaned_name} (
    id STRING PRIMARY KEY,
    abstract STRING,
    active_year STRING, 
    concepts STRING, 
    concepts_scores STRING,
    date_inserted STRING, 
    dimensions_url STRING,
    end_date STRING, 
    funder_orgs STRING, 
    funding_USD STRING, 
    investigators STRING,
    keywords STRING,
    original_title STRING, 
    research_orgs STRING,
    researchers STRING, 
    score STRING, 
    start_date STRING, 
    title STRING
);""")
            
    
    print(f"querying for {author_name}")
    q = query_work2_template.format(author_name)
    
    result = dsl.query_iterative(q, show_results=None, limit=100, skip=0, pause=1.5, force=False, maxlimit=0, verbose=None, _tot_count_prev_query=0, _warnings_tot=None)
    result_dict = result.data["grants"]
    
    for patent in result_dict: 
        fields = []
        values = []
        
        for field, value in patent.items():
            if not isinstance(value, str):
                # Convert non-string values to JSON string
                value = json.dumps(value);
            value = value.replace("'", "''")
            fields.append(field)
            values.append(f"'{value}'")  # Enclose string values in single quotes

        # Join the lists to create the SQL query
        columns = ', '.join(fields)
        values_str = ', '.join(values)

        insert_query = insert_query = f"INSERT INTO {cleaned_name} ({columns}) VALUES({values_str});"

        execute_command(insert_query)




Starting iteration with limit=100 skip=0 ...[0m


querying for Maria T Abreu


0-100 / 904 (1.98s)[0m
100-200 / 904 (2.07s)[0m
200-300 / 904 (1.46s)[0m
300-400 / 904 (2.42s)[0m
400-500 / 904 (2.13s)[0m
500-600 / 904 (4.17s)[0m
600-700 / 904 (2.48s)[0m
700-800 / 904 (1.92s)[0m
800-900 / 904 (4.04s)[0m
900-904 / 904 (1.05s)[0m
===
Records extracted: 904[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Domenico Accili


0-17 / 17 (0.96s)[0m
===
Records extracted: 17[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Seema Sharma Aceves


===
Records extracted: 0[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Michael J Ackerman


0-19 / 19 (1.08s)[0m
===
Records extracted: 19[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Peter A.J Adam


0-1 / 1 (0.54s)[0m
===
Records extracted: 1[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Christopher M Adams


0-100 / 212 (4.80s)[0m
100-200 / 212 (3.76s)[0m
200-212 / 212 (2.28s)[0m
===
Records extracted: 212[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for John S Adams


0-100 / 522 (3.25s)[0m
100-200 / 522 (1.67s)[0m
200-300 / 522 (4.61s)[0m
300-400 / 522 (1.60s)[0m
400-500 / 522 (3.47s)[0m
500-522 / 522 (5.53s)[0m
===
Records extracted: 522[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Raymond Adams


0-71 / 71 (2.90s)[0m
===
Records extracted: 71[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for William Adams


0-100 / 601 (1.65s)[0m
100-200 / 601 (4.87s)[0m
200-300 / 601 (1.37s)[0m
300-400 / 601 (2.68s)[0m
400-500 / 601 (1.79s)[0m
500-600 / 601 (7.16s)[0m
600-601 / 601 (2.02s)[0m
===
Records extracted: 601[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for John W Adamson


0-20 / 20 (0.91s)[0m
===
Records extracted: 20[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Eli Adashi


0-9 / 9 (0.55s)[0m
===
Records extracted: 9[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Robert S Adelstein


0-7 / 7 (0.59s)[0m
===
Records extracted: 7[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Siamak A Adibi


0-4 / 4 (0.45s)[0m
===
Records extracted: 4[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for N Franklin Adkinson


0-13 / 13 (2.14s)[0m
===
Records extracted: 13[0m
Starting iteration with limit=100 skip=0 ...[0m


querying for Sheldon Adler


0-7 / 7 (0.76s)[0m
===
Records extracted: 7[0m
