In [1]:
import requests
import os
import time


In [2]:
CIK = "0000863436"
headers = {
    "User-Agent": "YourName your_email@example.com"
}

url = f"https://data.sec.gov/submissions/CIK{CIK}.json"
resp = requests.get(url, headers=headers)
data = resp.json()


In [3]:
filings = data["filings"]["recent"]

forms = filings["form"]
accession_numbers = filings["accessionNumber"]
primary_docs = filings["primaryDocument"]
report_dates = filings["reportDate"]

targets = []

for form, acc, doc, date in zip(forms, accession_numbers, primary_docs, report_dates):
    if form in ("10-Q", "10-K"):
        targets.append({
            "form": form,
            "accession": acc.replace("-", ""),
            "doc": doc,
            "date": date
        })

len(targets)  # 你会看到很多年数据


65

In [11]:
BASE_DIR = "benchmark_filings"
os.makedirs(BASE_DIR, exist_ok=True)


In [17]:
for t in targets:
    acc = t["accession"]
    doc = t["doc"]
    form = t["form"]
    date = t["date"]

    filing_url = (
        f"https://www.sec.gov/Archives/edgar/data/"
        f"{int(CIK)}/{acc}/{doc}"
    )

    filename = f"{form}_{date}_{doc}"
    filepath = os.path.join(BASE_DIR, filename)

    if os.path.exists(filepath):
        print("SKIP (exists):", filename)
        continue
    print("Downloading:", filename)

    r = requests.get(filing_url, headers=headers)
    r.raise_for_status()

    with open(filepath, "wb") as f:
        f.write(r.content)

    time.sleep(0.2)  # ⚠️ SEC 要求限速


SKIP (exists): 10-Q_2025-09-30_bhe-20250930.htm
SKIP (exists): 10-Q_2025-06-30_bhe-20250630.htm
SKIP (exists): 10-Q_2025-03-31_bhe-20250331.htm
SKIP (exists): 10-K_2024-12-31_bhe-20241231.htm
SKIP (exists): 10-Q_2024-09-30_bhe-20240930.htm
SKIP (exists): 10-Q_2024-06-30_bhe-20240630.htm
SKIP (exists): 10-Q_2024-03-31_bhe-20240331.htm
SKIP (exists): 10-K_2023-12-31_bhe-20231231.htm
SKIP (exists): 10-Q_2023-09-30_bhe-20230930.htm
SKIP (exists): 10-Q_2023-06-30_bhe-20230630.htm
SKIP (exists): 10-Q_2023-03-31_bhe-20230331.htm
SKIP (exists): 10-K_2022-12-31_bhe-20221231.htm
SKIP (exists): 10-Q_2022-09-30_bhe-20220930.htm
SKIP (exists): 10-Q_2022-06-30_bhe-20220630.htm
SKIP (exists): 10-Q_2022-03-31_bhe-20220331.htm
SKIP (exists): 10-K_2021-12-31_bhe-20211231.htm
SKIP (exists): 10-Q_2021-09-30_bhe-20210930.htm
SKIP (exists): 10-Q_2021-06-30_bhe-20210630.htm
SKIP (exists): 10-Q_2021-03-31_bhe-10q_20210331.htm
SKIP (exists): 10-K_2020-12-31_bhe-10k_20201231.htm
SKIP (exists): 10-Q_2020-09-30_b

In [8]:
import os
import re
from datetime import datetime
BASE_DIR = "benchmark_filings"
files = os.listdir(BASE_DIR)

records = []

pattern = re.compile(r"(10-Q|10-K)_(\d{4}-\d{2}-\d{2})")

for f in files:
    match = pattern.search(f)
    if match:
        form = match.group(1)
        date = datetime.strptime(match.group(2), "%Y-%m-%d")
        records.append({
            "file": f,
            "form": form,
            "date": date
        })
records = sorted(records, key=lambda x: x["date"], reverse=True)



In [10]:
selected = []
quarter_count = 0

for r in records:
    if r["form"] == "10-Q":
        quarter_count += 1
    elif r["form"] == "10-K":
        quarter_count += 4

    selected.append(r)

    if quarter_count >= 12:
        break
for r in selected:
    print(r["form"], r["date"].date(), r["file"])


10-Q 2025-09-30 10-Q_2025-09-30_bhe-20250930.htm
10-Q 2025-06-30 10-Q_2025-06-30_bhe-20250630.htm
10-Q 2025-03-31 10-Q_2025-03-31_bhe-20250331.htm
10-K 2024-12-31 10-K_2024-12-31_bhe-20241231.htm
10-Q 2024-09-30 10-Q_2024-09-30_bhe-20240930.htm
10-Q 2024-06-30 10-Q_2024-06-30_bhe-20240630.htm
10-Q 2024-03-31 10-Q_2024-03-31_bhe-20240331.htm
10-K 2023-12-31 10-K_2023-12-31_bhe-20231231.htm
