## FDA Device Database Scraper
This notebook pulls the data on 343 device approvals for AI/ML devices from the FDA website that was released on 09/22/2021: 
https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-and-machine-learning-aiml-enabled-medical-devices

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
FDA_URL = "https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-and-machine-learning-aiml-enabled-medical-devices"
OUTPUT_FILE  = 'fda_approved_devices.csv'

In [3]:
# Get web page and parse
page = requests.get(FDA_URL)
soup = BeautifulSoup(page.content, "html.parser")
results = soup.find_all(role="row")
col_headers = results[0].text.split('\n')[:-1]
entries = [pd.DataFrame(rr.text.split('\n')[:-1]).T for rr in results[1:]]
entries_df = pd.concat(entries)

In [4]:
# List all entries that are messy with some extra columns - just one
display(entries_df[entries_df[entries_df.columns[6]].notna()])

Unnamed: 0,0,1,2,3,4,5,6,7
0,09/16/2011,K102778,,CellaVision® DM1200 with the body fluid applic...,,CellaVision AB,Hematology,JOY


In [5]:
# Exclude messy entry and clean up df
entries_df_clean = entries_df[entries_df[entries_df.columns[6]].isna()].iloc[:,0:6]
# Add back the cleaned up messy entry
messy_df = entries_df[entries_df[entries_df.columns[6]].notna()]
messy_df = messy_df.drop(messy_df.columns[[2,4]], axis = 1)
messy_df.columns = range(6)
entries_df_clean = entries_df_clean.append(messy_df)
# Fix column names
entries_df_clean.columns = col_headers
# Add link to URL for additional FDA details
base_fda_info_url = "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/pmn.cfm?ID="
entries_df_clean['fda_url'] = [base_fda_info_url + id for id in entries_df_clean['Submission Number']]

In [6]:
# Show some data
display(entries_df_clean)

Unnamed: 0,Date of Final Decision,Submission Number,Device,Company,Panel (Lead),Primary Product Code,fda_url
0,06/17/2021,K203514,Precise Position,"Philips Healthcare (Suzhou) Co., Ltd.",Radiology,JAK,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,06/16/2021,K202718,Qmenta Care Platform Family,"Mint Labs, Inc., D/B/A. QMENTA",Radiology,LLZ,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,06/11/2021,K210484,"LINQ II Insertable Cardiac Monitor, Zelda AI E...","Medtronic, Inc.",Cardiovascular,MXD,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,06/10/2021,K203629,IDx-DR,Digital Diagnostics Inc.,Ophthalmic,PIB,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,06/02/2021,DEN200069,Cognoa Asd Diagnosis Aid,"Cognoa, Inc.",Neurology,QPF,https://www.accessdata.fda.gov/scripts/cdrh/cf...
...,...,...,...,...,...,...,...
0,05/15/2008,K080762,"IB Neuro, Version 1.0","Imaging Biometrics, LLC",Radiology,LNH,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,02/10/2004,K033840,CellaVision DM96 Automatic Hematology Analyzer,Cella Vision AB,Hematology,JOY,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,03/15/2001,K003301,DiffMaster Octavia Automatic Hematology Analyzer,CellaVision AB,Hematology,JOY,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,03/11/1997,K955841,Compumedics Sleep Monitoring System,Computmedics Sleep Pty. Ltd.,Anesthesiology,MNR,https://www.accessdata.fda.gov/scripts/cdrh/cf...


In [7]:
# Count product categories
entries_df_clean['Panel (Lead)'].value_counts()
# NB don't forget the one from hematology excluded above

Radiology                      241
Cardiovascular                  41
Hematology                      13
Neurology                       12
Ophthalmic                       6
Clinical Chemistry               5
Microbiology                     5
General And Plastic Surgery      5
Gastroenterology-Urology         4
Anesthesiology                   4
General Hospital                 3
Pathology                        1
Orthopedic                       1
Dental                           1
Obstetrics And Gynecology        1
Name: Panel (Lead), dtype: int64

In [8]:
# Count companies
entries_df_clean['Company'].value_counts()

Siemens Medical Solutions USA, Inc.                                13
Canon Medical Systems Corporation                                   9
Aidoc Medical, Ltd.                                                 8
Zebra Medical Vision Ltd.                                           7
GE Medical Systems Ultrasound and Primary Care Diagnostics, LLC     7
                                                                   ..
Icometrix                                                           1
Behold.AI Technologies Limited                                      1
EXINI Diagnostics AB                                                1
Cosmo Artificial Intelligence - AI, Ltd.                            1
Circle Cardiovascular Imaging Inc                                   1
Name: Company, Length: 249, dtype: int64

In [9]:
# Save to file
entries_df_clean.to_csv(OUTPUT_FILE, index = False)