In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Input parameters
start_year = 2015  # Adjust for start year
end_year = 2025    # Adjust for end year
start_month = 1    # Adjust for start month (1 = January)
end_month = 1     # Adjust for end month (12 = December)
company_name = "Pfizer"  # Enter the company name to search for

# Function to fetch data for a given month and year
def fetch_approvals(month, year):
    url = f"https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm?event=reportsSearch.process&rptName=1&reportSelectMonth={month}&reportSelectYear={year}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://www.accessdata.fda.gov/",
        "Connection": "keep-alive",
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        table = soup.find("table")
        if table:
            rows = table.find_all("tr")
            approvals = []
            for row in rows[1:]:  # Skip the header row
                cols = row.find_all("td")
                if cols:
                    approvals.append({
                        "Approval Date": cols[0].text.strip(),
                        "Drug Name": cols[1].text.strip(),
                        "Submission": cols[2].text.strip(),
                        "Active Ingredients": cols[3].text.strip(),
                        "Sponsor": cols[4].text.strip(),
                    })
            return approvals
    return []

# Loop through all months and years in the specified range
all_approvals = []
for year in range(start_year, end_year + 1):  # Loop through years
    for month in range(start_month, end_month + 1):  # Loop through months
        print(f"Fetching data for {month}/{year}...")
        month_approvals = fetch_approvals(month, year)
        all_approvals.extend(month_approvals)

# Convert to a DataFrame
df = pd.DataFrame(all_approvals)

# Filter for the specified company
filtered_approvals = df[df["Sponsor"].str.contains(company_name, case=False, na=False)]

# Display the results
if not filtered_approvals.empty:
    print(f"Approvals for {company_name}:")
    print(filtered_approvals)
else:
    print(f"No approvals found for {company_name} in the specified time range.")

# Save all approvals to a CSV
#df.to_csv(f"fda_approvals_{start_year}_to_{end_year}.csv", index=False)
#print(f"All approvals saved to fda_approvals_{company_name}_{start_year}_to_{end_year}.csv")

# Save company-specific approvals to a separate CSV (if found)
if not filtered_approvals.empty:
    filtered_approvals.to_csv(f"{company_name}_approvals_{start_year}_to_{end_year}.csv", index=False)
    print(f"{company_name} data saved to {company_name}_approvals_{start_year}_to_{end_year}.csv")


Fetching data for 1/2015...
Fetching data for 1/2016...
Fetching data for 1/2017...
Fetching data for 1/2018...
Fetching data for 1/2019...
Fetching data for 1/2020...
Fetching data for 1/2021...
Fetching data for 1/2022...
Fetching data for 1/2023...
Fetching data for 1/2024...
Fetching data for 1/2025...
Approvals for Pfizer:
     Approval Date                                          Drug Name  \
110     01/09/2015                             PROCARDIANDA   #018482   
111     01/09/2015                          PROCARDIA XLNDA   #019684   
139     01/12/2015                     CAVERJECT IMPULSENDA   #021212   
145     01/12/2015                             CLEOCIN TNDA   #050615   
153     01/13/2015                               LONITENNDA   #018154   
343     01/23/2015                          DEPO-PROVERANDA   #020246   
358     01/23/2015                 DEPO-SUBQ PROVERA 104NDA   #021583   
408     01/27/2015                               FRAGMINNDA   #020287   
435     01/30