In [None]:
import os
from dotenv import load_dotenv
from sec_api import QueryApi, RenderApi
# Load environment variables from .env file
load_dotenv()

# Retrieve the API key from the environment
QUERY_API_KEY = os.getenv("QUERY_API_KEY")
RENDER_API_KEY = os.getenv("RENDER_API_KEY")
# print(f"QUERY_API_KEY: {QUERY_API_KEY}")

# Initialize API clients
queryApi = QueryApi(api_key=QUERY_API_KEY)
renderApi = RenderApi(api_key=RENDER_API_KEY)

query = {
  "query": { "query_string": { 
      "query": "formType:\"10-K\" AND ticker:AAPL", # only 10-Ks
  }},
  "from": "0", # start returning matches from position null, i.e. the first matching filing 
  "size": "1"  # return just one filing
}

response = queryApi.get_filings(query)


In [3]:
import json 
print(json.dumps(response, indent=2))

{
  "total": {
    "value": 96,
    "relation": "eq"
  },
  "query": {
    "from": 0,
    "size": 10
  },
  "filings": [
    {
      "ticker": "AAPL",
      "formType": "10-Q",
      "accessionNo": "0000320193-25-000008",
      "cik": "320193",
      "companyNameLong": "Apple Inc. (Filer)",
      "companyName": "Apple Inc.",
      "linkToFilingDetails": "https://www.sec.gov/Archives/edgar/data/320193/000032019325000008/aapl-20241228.htm",
      "description": "Form 10-Q - Quarterly report [Sections 13 or 15(d)]",
      "linkToTxt": "https://www.sec.gov/Archives/edgar/data/320193/000032019325000008/0000320193-25-000008.txt",
      "filedAt": "2025-01-31T06:01:27-05:00",
      "documentFormatFiles": [
        {
          "sequence": "1",
          "size": "732589",
          "documentUrl": "https://www.sec.gov/ix?doc=/Archives/edgar/data/320193/000032019325000008/aapl-20241228.htm",
          "description": "10-Q",
          "type": "10-Q"
        },
        {
          "sequence": "2",


In [None]:
# Base query template
base_query = {
  "query": { 
      "query_string": { 
          "query": "PLACEHOLDER",  # Placeholder to be replaced by the specific query
          "time_zone": "America/New_York"
      } 
  },
  "from": "0",
  "size": "200",
  # sort returned filings by the filedAt key/value
  "sort": [{"filedAt": {"order": "desc"}}]
}

# Open the file we use to store the filing URLs
log_file = open("filing_urls_AAPL.txt", "a")

# Fetch filings for years 2022 and 2021
for year in range(2022, 2025, 1):
    print(f"Starting download for year {year}")
    
    for month in range(1, 13, 1):  # Iterate over each month
        # Construct search query for 10-K filings within the given year-month for AAPL
        universe_query = (
            "formType:(\"10-K\") AND " +
            "filedAt:[{year}-{month:02d}-01 TO {year}-{month:02d}-31] AND " +
            "ticker:AAPL"
        ).format(year=year, month=month)

        # Debugging: Print the query being generated
        print(f"Generated query: {universe_query}")

        # Update query template with specific query string
        base_query["query"]["query_string"]["query"] = universe_query

        # Pagination: Fetch results in batches of 200
        for from_batch in range(0, 400, 200):  # Update pagination start index 
            base_query["from"] = from_batch

            # Debugging: Print the page we're fetching
            print(f"Fetching batch starting at index {from_batch}...")

            try:
                response = queryApi.get_filings(base_query)
                
                # Debugging: Print the raw response
                print(f"Response received: {response}")

                # Stop if no more filings are found
                if len(response["filings"]) == 0:
                    print("No more filings found.")
                    break

                # Extract and store filing URLs
                urls_list = [x["linkToFilingDetails"] for x in response["filings"]]

                # Debugging: Print the number of URLs found in this batch
                print(f"Found {len(urls_list)} URLs in this batch")

                # Transform list of URLs into one string by joining all list elements
                # and add a new-line character between each element.
                urls_string = "\n".join(urls_list) + "\n"
                
                # Write URLs to log file
                log_file.write(urls_string)

            except Exception as e:
                print(f"Error during API request: {e}")
                break

# Close the log file
log_file.close()
print("All AAPL URLs downloaded")


Starting download for year 2022
Generated query: formType:("10-K") AND filedAt:[2022-01-01 TO 2022-01-31] AND ticker:AAPL
Fetching batch starting at index 0...
Response received: {'total': {'value': 0, 'relation': 'eq'}, 'query': {'from': 0, 'size': 200}, 'filings': []}
No more filings found.
Generated query: formType:("10-K") AND filedAt:[2022-02-01 TO 2022-02-31] AND ticker:AAPL
Fetching batch starting at index 0...
Response received: {'total': {'value': 0, 'relation': 'eq'}, 'query': {'from': 0, 'size': 200}, 'filings': []}
No more filings found.
Generated query: formType:("10-K") AND filedAt:[2022-03-01 TO 2022-03-31] AND ticker:AAPL
Fetching batch starting at index 0...
Response received: {'total': {'value': 0, 'relation': 'eq'}, 'query': {'from': 0, 'size': 200}, 'filings': []}
No more filings found.
Generated query: formType:("10-K") AND filedAt:[2022-04-01 TO 2022-04-31] AND ticker:AAPL
Fetching batch starting at index 0...
Response received: {'total': {'value': 0, 'relation': 

In [13]:
import multiprocessing

# Download 10-k Filings from Stored URLs
def download_filing(url):
  """
  Downloads a 10-K filing from SEC-API and saves it to the 'filings' folder.
  """
  filing = renderApi.get_filing(url)
  file_name = url.split("/")[-2] + "-" + url.split("/")[-1] # Generate filename
  download_to = "./filings/" + file_name
  
  with open(download_to, "w") as f:
    f.write(filing) # Save filing content to file


# load URLs from log file
def load_urls():
  """
  Loads filing URLs from the log file.
  Returns a list of URLs.
  """
  log_file = open("filing_urls_AAPL.txt", "r")
  urls = log_file.read().split("\n") # convert long string of URLs into a list 
  log_file.close()
  return urls

def download_all_filings():
  """
  Downloads all filings in parallel using multiprocessing.
  """
  print("Start downloading all filings")

  # Create download folder if it doesn't exist
  download_folder = "./filings" 
  if not os.path.isdir(download_folder):
    os.makedirs(download_folder)
    
# Load filing URLs
  urls = load_urls()[1:40] # Downloading the first 40 for testing
  print("{length} filing URLs loaded".format(length=len(urls)))

  # Number of parallel processes for downloading
  number_of_processes = 20

  with multiprocessing.Pool(number_of_processes) as pool:
    pool.map(download_filing, urls)
  
  print("All filings downloaded")

In [None]:
# Execute download process
download_all_filings()