# Exploring MAUDE Database

In [160]:
import pandas as pd 
import numpy as np 
import urllib.request, json
from pandas.io.json import json_normalize


In [161]:
with urllib.request.urlopen("https://api.fda.gov/device/event.json?search=date_received:[20170101+TO+20171231]&limit=100") as url:
    data = json.loads(url.read())

# Normalize the data based on the "results" section of the JSON
# First normalization to see the structure of the data
maude_general = json_normalize(data['results'])

maude_general.head()

Unnamed: 0,adverse_event_flag,date_facility_aware,date_manufacturer_received,date_of_event,date_received,date_report,device,device_date_of_manufacturer,distributor_address_1,distributor_address_2,...,removal_correction_number,report_number,report_source_code,report_to_fda,report_to_manufacturer,reporter_occupation_code,reprocessed_and_reused_flag,single_use_flag,source_type,type_of_report
0,Y,,20110503,20100101.0,20170112,20110421.0,[],,,,...,,1226230-2011-00002,Manufacturer report,N,,PHYSICIAN,I,Y,[HEALTH PROFESSIONAL],"[Followup, Followup, Followup, Followup, Follo..."
1,N,,20150810,,20170309,,"[{'manufacturer_d_address_1': '', 'manufacture...",20030314.0,,,...,,2210968-2015-02964,Manufacturer report,N,,,I,*,[],[Followup]
2,N,,20170802,20161205.0,20170101,20161229.0,[{'manufacturer_d_address_1': '900 WALNUT RIDG...,20150721.0,,,...,,2183926-2016-00829,Manufacturer report,N,,OTHER,N,N,[USER FACILITY],[Initial submission]
3,N,,20170221,20161209.0,20170101,20161209.0,[{'manufacturer_d_address_1': '900 WALNUT RIDG...,20150721.0,,,...,,2183926-2016-00828,Manufacturer report,N,,OTHER,N,N,[USER FACILITY],[Initial submission]
4,N,,20170928,20161208.0,20170101,20161208.0,[{'manufacturer_d_address_1': '900 WALNUT RIDG...,20150330.0,,,...,,2183926-2016-00830,Manufacturer report,N,,OTHER,N,N,[USER FACILITY],[Initial submission]


In [162]:
# Most of the imformation is housed within the "device" section of the "results
maude_device = json_normalize(data=data['results'], record_path='device', 
        # Appending meta data to the end of each row with information we want to keep from "results"
             meta=['adverse_event_flag', 'date_manufacturer_received','date_of_event', 
              'event_key', 'event_location', 'event_type',
             'health_professional', 'manufacturer_city', 'manufacturer_country'],
        errors='ignore')

In [163]:
maude_device.head()

Unnamed: 0,brand_name,catalog_number,date_received,date_removed_flag,date_returned_to_manufacturer,device_age_text,device_availability,device_evaluated_by_manufacturer,device_event_key,device_operator,...,other_id_number,adverse_event_flag,date_manufacturer_received,date_of_event,event_key,event_location,event_type,health_professional,manufacturer_city,manufacturer_country
0,MESH,,20170309,,,DA,Yes,N,,HEALTH PROFESSIONAL,...,,N,20150810,,,I,Injury,,,
1,MERGE HEMODYNAMICS,,20170101,,,DA,Yes,R,,HEALTH PROFESSIONAL,...,,N,20170802,20161205.0,,I,Malfunction,N,,
2,MERGE HEMODYNAMICS,,20170101,,20170220.0,DA,Device was returned to manufacturer,Y,,HEALTH PROFESSIONAL,...,,N,20170221,20161209.0,,I,Malfunction,N,,
3,MERGE UNITY PACS,,20170101,,,DA,Yes,Y,,HEALTH PROFESSIONAL,...,,N,20170928,20161208.0,,I,Malfunction,N,,
4,OT VERIO FLEX METER,,20170101,,,,No,R,,LAY USER/PATIENT,...,1-3BBSA4F,N,20161224,,,I,Malfunction,N,,


In [164]:
# A function to extract the desired data from the openfda column
def get_openfda(open_dict):
    # The data is housed in a json dictionary structure
    device_name = open_dict['device_name']
    device_class = open_dict['device_class']
    medical_specialty = open_dict['medical_specialty_description']
    # returning all of the captured information
    return [device_name, device_class, medical_specialty]

In [165]:
# Creating a series containing all of the results using df.apply
openfda = maude_device['openfda'].apply(get_openfda)

# Converting the lists in the series to a dataframe
openfda_df = pd.DataFrame.from_items(zip(openfda.index, openfda.values))
openfda_df = openfda_df.transpose()
openfda_df.columns = ['general_device_name', 'device_class', 'medical_specialty']

In [166]:
# Making sure the maude_device dataframe and the openfda_df dataframe are the same length before merging
print(len(maude_device), len(openfda_df))

openfda_df.head()

99 99


Unnamed: 0,general_device_name,device_class,medical_specialty
0,"Mesh, Surgical, Polymeric",2,"General, Plastic Surgery"
1,"Computer, Diagnostic, Programmable",2,Cardiovascular
2,"Computer, Diagnostic, Programmable",2,Cardiovascular
3,"System, Image Processing, Radiological",2,Radiology
4,"System, Test, Blood Glucose, Over The Counter",2,Clinical Chemistry


In [169]:
# Combinding the two dataframes
maude_tot = pd.concat([maude_device, openfda_df], axis=1)
# Removing the initial openfda column
maude_tot.drop('openfda', axis=1, inplace=True)
maude_tot.head()

Unnamed: 0,brand_name,catalog_number,date_received,date_removed_flag,date_returned_to_manufacturer,device_age_text,device_availability,device_evaluated_by_manufacturer,device_event_key,device_operator,...,date_of_event,event_key,event_location,event_type,health_professional,manufacturer_city,manufacturer_country,general_device_name,device_class,medical_specialty
0,MESH,,20170309,,,DA,Yes,N,,HEALTH PROFESSIONAL,...,,,I,Injury,,,,"Mesh, Surgical, Polymeric",2,"General, Plastic Surgery"
1,MERGE HEMODYNAMICS,,20170101,,,DA,Yes,R,,HEALTH PROFESSIONAL,...,20161205.0,,I,Malfunction,N,,,"Computer, Diagnostic, Programmable",2,Cardiovascular
2,MERGE HEMODYNAMICS,,20170101,,20170220.0,DA,Device was returned to manufacturer,Y,,HEALTH PROFESSIONAL,...,20161209.0,,I,Malfunction,N,,,"Computer, Diagnostic, Programmable",2,Cardiovascular
3,MERGE UNITY PACS,,20170101,,,DA,Yes,Y,,HEALTH PROFESSIONAL,...,20161208.0,,I,Malfunction,N,,,"System, Image Processing, Radiological",2,Radiology
4,OT VERIO FLEX METER,,20170101,,,,No,R,,LAY USER/PATIENT,...,,,I,Malfunction,N,,,"System, Test, Blood Glucose, Over The Counter",2,Clinical Chemistry


In [170]:
maude_tot.columns

Index(['brand_name', 'catalog_number', 'date_received', 'date_removed_flag',
       'date_returned_to_manufacturer', 'device_age_text',
       'device_availability', 'device_evaluated_by_manufacturer',
       'device_event_key', 'device_operator', 'device_report_product_code',
       'device_sequence_number', 'expiration_date_of_device', 'generic_name',
       'implant_flag', 'lot_number', 'manufacturer_d_address_1',
       'manufacturer_d_address_2', 'manufacturer_d_city',
       'manufacturer_d_country', 'manufacturer_d_name',
       'manufacturer_d_postal_code', 'manufacturer_d_state',
       'manufacturer_d_zip_code', 'manufacturer_d_zip_code_ext',
       'model_number', 'other_id_number', 'adverse_event_flag',
       'date_manufacturer_received', 'date_of_event', 'event_key',
       'event_location', 'event_type', 'health_professional',
       'manufacturer_city', 'manufacturer_country', 'general_device_name',
       'device_class', 'medical_specialty'],
      dtype='object')