# OpenFDA API 

In [1]:
import pandas as pd
import numpy as np
import urllib.request, json
from pandas.io.json import json_normalize
import datetime
import math

## Step 1 

Specify API Key

In [2]:
def fetch_key(filename, dict_key):
    '''
    Fetches an API key stored in a json file
    
    Keyword Arguments
    :str filename: file location/filename for json file that houses the API key 
    :str dict_key: Key value within the dictionary that stores the API key
    
    Output
    API key (string) 
    '''
    # Reading the json as a dict
    with open(filename) as json_data:
        data = json.load(json_data)
    # returning the API key from the dictionary    
    return(data[dict_key])

## Step 2 
Define Endpoint

In [8]:
def MAUDE_search_url(search_param, limit, skip, key):
    '''
    Creates a URL for the openFDA medical device adverse event API.  More info on searchable fields can be 
    found at https://open.fda.gov/apis/device/event/searchable-fields/
    
    Keyword Arguments
    :str search_param: The search parameter for MAUDE
    :int limit: Number of events to return (0-100)
    :int skip: Number of events to skip before returning the data (0-100)
    :str key: API key
    
    Output
    URL (string)
    '''
    # Base URL for the device adverse event endpoint 
    base = 'https://api.fda.gov/device/event.json?'
    # How to store the API key
    api = 'api_key=' + key
    # define search parameters 
    search = '&search=' + search_param
    # set return limit and skip
    other = '&limit=' + str(limit) + '&skip=' + str(skip)

    return base + api + search + other 

## Step 3
Get JSON data

In [9]:
def fetch_json(json_web):
    '''
    Reads JSON data from the web
    
    Keyword Arguments
    :str json_web: URL for the openFDA medical device adverse event API
    
    Output
    JSON Dictionary
    
    '''
    with urllib.request.urlopen(json_web) as url:
        data = json.loads(url.read())
    return data

## Step 4
Get meta data

In [10]:
def get_meta(data):
    '''
    Gets the meta data from the OpenFDA query and outputs it as a dataframe
    
    Keyword Arguments
    :dict data: JSON dictionary generated by the openFDA API
    
    Output
    pandas dataframe containing meta data
    '''
    return json_normalize(data['meta'])

## Step 5
Normalize JSON data into a dataframe

In [35]:
def get_mdr_text(open_list):
    '''
    A function designed to be used inconjunction with the openfda MAUDE database.  It targets the 
    mdr_text column and extracts the information that is important to pull from the nested json 
    mdr_text column.
    '''
    # looping through the dictionaries in the list
    # There is typically 1-2 dictionaries in each list 
    for mdr_text in open_list:
        # check to see if the text type is a descrption of the event
        if mdr_text['text_type_code'] == 'Description of Event or Problem':
            try:
                # Getting the text data from the JSON dictionary structure
                text = mdr_text['text']
                 # returning all of the captured information
                return text
            except:
                return np.NaN

In [138]:
def maude_results(data):
    '''
    Takes a JSON dictionary from the openFDA medical device adverse event API endpoint and returns
    a flattened dataframe. 
    
    Keyword Arguments
    :dict data: JSON dictionary generated by the openFDA medical device adverse event API
    
    Output
    pandas dataframe containing the returned adverse events
    '''
    # Get the base results from the query
    base_results = json_normalize(data=data['results'], errors='ignore')
    # Expand the device column which is nested JSON
    device = json_normalize(data=data['results'], 
                             record_path='device', 
                             meta_prefix='base',
                             errors='ignore')
    # Expand the openfda column from the expanded device column
    openfda = json_normalize(device['openfda'], errors='ignore')
    # all results as a single dataframe, remove nested json columns 
    all_data = pd.concat([base_results, device, openfda], axis=1).drop(['device', 'openfda'], axis=1)
    # get MDR text column
    all_data['mdr_text_extract'] = all_data['mdr_text'].apply(get_mdr_text)
    # remove duplicate columns
    all_data = all_data.loc[:, ~all_data.columns.duplicated()]
    return all_data

In [139]:
url = MAUDE_search_url('date_received:[20140101+TO+20140131]', 
                       100, 0, fetch_key('openfda_key.json', 'api_key'))
data = fetch_json(url)
meta = get_meta(data)
test1 = maude_results(data)

test1.shape

(100, 105)

In [140]:
url2 = MAUDE_search_url('date_received:[20150101+TO+20150131]', 
                       75, 0, fetch_key('openfda_key.json', 'api_key'))
data2 = fetch_json(url2)
meta2 = get_meta(data2)
test2 = maude_results(data2)

test2.shape

(75, 106)

In [145]:
url3 = MAUDE_search_url('date_received:[20160101+TO+20160131]', 
                       75, 1000, fetch_key('openfda_key.json', 'api_key'))
data3 = fetch_json(url3)
meta3 = get_meta(data3)
test3 = maude_results(data3)

test3.shape

(75, 105)

## Step 6
A function to fetch multiple sets of data from the openFDA database and concat all of the data together 

In [152]:
pd.concat([test1, test2, test3], axis=0, sort=False, join='inner', ignore_index=True).shape

(250, 104)

Concat function does not work with duplicate column names 