## FDA Device Database Scraper
This notebook pulls the data on 343 device approvals (initially this many, though more have been added, now 521) for AI/ML devices from the FDA website that was released on 09/22/2021: 
https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-and-machine-learning-aiml-enabled-medical-devices

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
FDA_URL = "https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-and-machine-learning-aiml-enabled-medical-devices"
OUTPUT_FILE  = 'fda_approved_devices.csv'

In [3]:
# Get web page and parse
page = requests.get(FDA_URL)
soup = BeautifulSoup(page.content, "html.parser")
results = soup.find_all(role="row")
col_headers = results[0].text.split('\n')[:-1]
entries = [pd.DataFrame(rr.text.split('\n')[:-1]).T for rr in results[1:]]
entries_df = pd.concat(entries)

In [4]:
# Fix column names
entries_df.columns = col_headers
# Drop first empty column
entries_df = entries_df.drop(entries_df.columns[0], axis = 1)
# Add link to URL for additional FDA details
base_fda_info_url = "https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/pmn.cfm?ID="
entries_df['fda_url'] = [base_fda_info_url + id for id in entries_df['Submission Number']]

In [5]:
# Show some data
display(entries_df)

Unnamed: 0,Date of Final Decision,Submission Number,Device,Company,Panel (Lead),Primary Product Code,fda_url
0,07/29/2022,K213760,ABMD Software,HeartLung Corporation,Radiology,KGI,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,07/29/2022,K220961,Deep Learning Image Reconstruction,GE Healthcare Japan Corporation,Radiology,JAK,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,07/28/2022,K213998,cvi42 Auto Imaging Software Application,Circle Cardiovascular Imaging Inc,Radiology,QIH,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,07/28/2022,K221923,Swoop Portable MR Imaging System,"Hyperfine, Inc.",Radiology,LNH,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,07/27/2022,K210822,DeepRhythmAI,Medicalgorithmics S.A.,Cardiovascular,DQK,https://www.accessdata.fda.gov/scripts/cdrh/cf...
...,...,...,...,...,...,...,...
0,03/12/2021,K202284,Oncospace,"Oncospace, Inc.",Radiology,MUJ,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,02/04/2021,K203582,QUIBIM Precision Prostate (qp-Prostate),QUIBIM S.L.,Radiology,LLZ,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,01/12/2021,K203280,Hepatica V1 (Hepatica V1.0.0),Perspectum Ltd.,Radiology,LNH,https://www.accessdata.fda.gov/scripts/cdrh/cf...
0,01/11/2021,P200003,Imagio Breast Imaging System,"Seno Medical Instruments, Inc.",Radiology,QNK,https://www.accessdata.fda.gov/scripts/cdrh/cf...


In [6]:
# Count product categories
entries_df['Panel (Lead)'].value_counts()

Radiology                      392
Cardiovascular                  57
Hematology                      15
Neurology                       14
Ophthalmic                       7
Clinical Chemistry               6
General And Plastic Surgery      5
Microbiology                     5
Gastroenterology-Urology         4
Anesthesiology                   4
Pathology                        4
General Hospital                 3
Gastroenterology & Urology       2
Obstetrics And Gynecology        1
Orthopedic                       1
Dental                           1
Name: Panel (Lead), dtype: int64

In [7]:
# Count companies
entries_df['Company'].value_counts()

Siemens Medical Solutions USA, Inc.    21
Canon Medical Systems Corporation      15
Aidoc Medical, Ltd.                    13
Zebra Medical Vision Ltd.               9
GE Medical Systems SCS                  7
                                       ..
Behold.AI Technologies Limited          1
Xiamen Manteia Technology Ltd.          1
Siemens Medical Solution USA, Inc.      1
Varian Medical Systems, Inc.            1
Seno Medical Instruments, Inc.          1
Name: Company, Length: 342, dtype: int64

In [8]:
# Save to file
entries_df.to_csv(OUTPUT_FILE, index = False)