In [2]:
import dimcli
from dimcli.utils import *
import re
import psycopg2

import os, sys, time, json
from tqdm.notebook import tqdm as progressbar

import pandas as pd

import plotly.express as px
from plotly.offline import plot

if not 'google.colab' in sys.modules:
  # make js dependecies local / needed by html exports
    from plotly.offline import init_notebook_mode
    init_notebook_mode(connected=True)

In [3]:
# Specify the path to your JSON file
id_path = 'asci_aap_dataJSON.json'

# Initialize an empty dictionary
id_dictionary = {}

try:
    # Open the JSON file in read mode
    with open(id_path, 'r') as json_file:
        # Load the JSON data into the dictionary
        id_dictionary = json.load(json_file)
except FileNotFoundError:
    print("JSON file not found.")
except json.JSONDecodeError:
    print("Error decoding JSON data.")

In [23]:
def execute_command(query, database):
    # database is either patents, grants, clinical_trials
    try:
        connection = psycopg2.connect(
            f"postgresql://selina04_mit_edu:iXr9mWuy_K_OSQv-tVgEug@livid-dibbler-6457.g8z.cockroachlabs.cloud:26257/livid-dibbler-6457.{database}?sslmode=verify-full"
        )

        # Create a cursor and execute the insert statement
        cursor = connection.cursor()
        # print('This is current command: ',query)
        cursor.execute(query)
        
        if cursor.description is not None:
            result = cursor.fetchall()
        else:
            result = None
            
        connection.commit()  # Commit the transaction
        # print("Data inserted successfully")
        cursor.close()
        connection.close()
        return result

    except psycopg2.Error as error:
        print(f"Error: {query}")
        print("------")
        print(error)

In [9]:
def get_counts(author_name):
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)
    
    query = f"SELECT COUNT(*) FROM {cleaned_name}"
    
    patent_count = execute_command(query, "patents")
    grants_count = execute_command(query, "grants")
    ct_count = execute_command(query, "clinical_trials")
    
    return patent_count[0][0], grants_count[0][0], ct_count[0][0]

print(get_counts("Amy P Abernethy"))

[(0,)]
[(7,)]
[(0,)]
(0, 7, 0)


In [15]:
def get_grant_total(author_name):
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)
    total = 0.0
    
    query = f"SELECT funding_usd FROM {cleaned_name}"
    
    grants_usd = execute_command(query, "grants")
    
    for funding in grants_usd:
        total += float(funding[0])
        
    return total

print(get_grant_total("Amy P Abernethy"))

[('2433915.0',), ('202881536.0',), ('1120157.0',), ('50000.0',), ('300000.0',), ('9640349.0',), ('7175320.0',)]
223601277.0


In [17]:
def get_r01_total(author_name):
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)
    
    query = f"SELECT COUNT(*) FROM {cleaned_name} WHERE grant_number LIKE '%R01%'"
    
    r01_total = execute_command(query, "grants")
        
    return r01_total[0][0]

print(get_r01_total("Amy P Abernethy"))

[(3,)]
3


In [30]:
def get_num_investigators(author_name, database):
    # for clinical trials
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', author_name)
    cleaned_name = re.sub(r'^\d', '_', cleaned_name)
    
    researchers = "investigators"
    
    if database == "patents":
        researchers = "inventors"
    
    query = f"SELECT {researchers} FROM {cleaned_name}"
    
    grants_list = execute_command(query, database)
    
    unique = set()
    for investigators in grants_list:
        
        # add their name to the unique list
        investigators_info = json.loads(investigators[0])
        
        for investigator in investigators_info:
            unique.add(investigator[0])

    return len(unique)

print(get_num_investigators("John Alexander", "clinical_trials"))

{'Mark D. Scarupa', 'Alex Reinkensmeyer', 'Eriko Tokunaga', 'Bangwei Cao', 'Ludger Rose', 'Nathan Pennell', 'Dianne Jones', 'George Kannourakis', 'Jasmine Kaur', 'Eric Prystowsky', 'James McCabe', 'Maria Beatrice Bilo', 'Laura Biganzoli', 'Alexander M. Stessin', 'Stephen Harold', 'Kiren Sahni', 'Robert M. Kyler', 'Manuchehr Darani', 'Makoto Nishio', 'Akiyoshi Uchiyama', 'Heike Strohschnitter', 'Matthew P. Deek', 'Gita V. Massey', 'Rohini Bagewadi', 'Rebecca Hahn', 'Melissa Ramos', 'James A. Wallace', 'Basel Altoos', 'Stephen McWilliam', 'Susan M. Chafe', 'Sohail Rao', 'Donald Baril', 'Cuneyt Koksoy', 'Mila Leong', 'John Balbas', 'Sina Porouchani', 'Santiago Gonzalez Santiago', 'Chrisette Dharma', 'Joslyn Walker', 'Filip Palarczyk', 'Alexander L Green', 'Gary Buchschacher Jr', 'Antti Jekunen', 'Patricia C Collins', 'Claudio Zamagni', 'John T Cole', 'Kirsten Leu', 'Alban Baruteau', 'Sawsan Rashdan', 'Junaid Siddiqui', 'Jean-François Rahier', 'Aarati V. Rao', 'Seema Nagpal', 'Sean Mcdermo