## Get extra data from openFDA

Using the set of devices in the initial database, use the FDA Open API to pull some additional variables.

In [1]:
import pandas as pd
import numpy as np
import requests, json, time, warnings

In [2]:
# Load data from last notebook
DAT_SRC = 'fda_approved_devices.csv'
df = pd.read_csv(DAT_SRC)
NICE_DELAY = 60 / 240 * 1.05 # Play nicely with the FDA API
# Note the API limits: https://open.fda.gov/apis/authentication/
# With no API key: 240 requests per minute, per IP address. 1,000 requests per day, per IP address.

In [3]:
# Use the Open FDA API: https://open.fda.gov/apis/device/510k/
def get_openfda_data(id):
    '''Take an application number in the 510k pathway or de novo and get the clearance type via open FDA API'''
    query = f'https://api.fda.gov/device/510k.json?search=k_number:"{id}"'
    time.sleep(NICE_DELAY)
    res = requests.get(query)
    if 'results' in json.loads(res.text).keys():
        dat = pd.json_normalize(json.loads(res.text))['results'][0][0]
    else:
        warnings.warn(f'Unable to obtain results for id: {id}. Returning empty (np.nan) result.')
        dat = np.nan
    return(dat)

def get_vals(rr):
    if rr is not np.nan:
        return(rr['clearance_type'], rr['openfda']['device_class'])
    else:
        return(np.nan, np.nan)

In [4]:
dat_list = [get_openfda_data(id) for id in df['Submission Number']]
extra_res = list(zip(*[get_vals(z) for z in dat_list]))

df['clearance_type'] = extra_res[0]
df['device_class'] = extra_res[1]



In [5]:
# Save updated data frame to file
df.to_csv(DAT_SRC, index = False)