# Project Data Downloader

This notebook will fetch EPA data ZIP files and extract them into an `EPA_Data/` subdirectory.

**Note:** Some file sizes are quite large and the processing may take a bit.


In [3]:
# 1. Imports and setup

import os
import requests
import zipfile
from io import BytesIO

# Directory where all EPA data will go
DATA_DIR = "EPA_Data"

# Ensure the target directory exists
os.makedirs(DATA_DIR, exist_ok=True)
print(f"Using data directory: {DATA_DIR}")


Using data directory: EPA_Data


In [10]:
# 2. List of EPA download URLs

urls = [
    "https://echo.epa.gov/files/echodownloads/SDWA_latest_downloads.zip",  # drinking water, PWSIDs
    "https://echo.epa.gov/files/echodownloads/npdes_downloads.zip", # npdes part 1
    "https://echo.epa.gov/files/echodownloads/npdes_eff_downloads.zip"  # npdes part 2 (this one's a doozy at 1.6gb!)
    #"https://raw.githubusercontent.com/USEPA/ORD_SAB_Model/main/Output_Data/EPA_CWS_V1.zip"
]


In [5]:
# 3. Download & extract function

def download_and_extract_zip(url: str, extract_to: str = DATA_DIR):
    print(f"Downloading: {url}")
    resp = requests.get(url, stream=True)
    resp.raise_for_status()

    # Use BytesIO to treat downloaded bytes as file-like object
    with zipfile.ZipFile(BytesIO(resp.content)) as z:
        print(f" → extracting {len(z.namelist())} files into '{extract_to}/'")
        z.extractall(extract_to)
    print(" ✔ Done\n")


In [11]:
# 4. Iterate through all URLs

for link in urls:
    try:
        download_and_extract_zip(link)
    except Exception as e:
        print(f"✖ Failed to process {link}: {e}")


Downloading: https://raw.githubusercontent.com/USEPA/ORD_SAB_Model/main/Output_Data/EPA_CWS_V1.zip
✖ Failed to process https://raw.githubusercontent.com/USEPA/ORD_SAB_Model/main/Output_Data/EPA_CWS_V1.zip: File is not a zip file


In [14]:

# Get shape file from PWSID boundary mapping github project (extra step in download for api)
api_url = (
    "https://api.github.com/repos/USEPA/ORD_SAB_Model/contents/Output_Data/EPA_CWS_V1.zip?ref=main"
)
meta = requests.get(api_url).json()
download_url = meta["download_url"]

# 2. Download & unpack
r = requests.get(download_url)
r.raise_for_status()
with zipfile.ZipFile(BytesIO(r.content)) as z:
    z.extractall(DATA_DIR)
