In [None]:
# Step 1:
import requests

def get_cik(ticker):
    url = f'https://www.sec.gov/files/company_tickers.json'
    response = requests.get(url)
    data = response.json()
    
    for key, value in data.items():
        if value['ticker'].lower() == ticker.lower():
            return str(value['cik_str']).zfill(10)

nvda_cik = get_cik("NVDA")
intc_cik = get_cik("INTC")
print("NVDA CIK:", nvda_cik)
print("INTC CIK:", intc_cik)

def get_10q_links(cik, company_name):
    headers = {'User-Agent': 'aleema.haque@marquette.edu'}  # Replace with your email
    url = f'https://data.sec.gov/submissions/CIK{cik}.json'
    response = requests.get(url, headers=headers)
    data = response.json()

    forms = data['filings']['recent']
    links = []

    for i in range(len(forms['form'])):
        if forms['form'][i] == '10-Q':
            accession = forms['accessionNumber'][i].replace('-', '')
            link = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession}/index.json"
            links.append(link)

    print(f"\n{company_name} 10-Q links:")
    for l in links:
        print(l)

get_10q_links(nvda_cik, "NVIDIA")
get_10q_links(intc_cik, "INTEL")



In [4]:
# STEP 2/3
from bs4 import BeautifulSoup

def extract_sections_from_edgar(base_url):
    index_url = base_url + "index.json"
    index = requests.get(index_url, headers=headers).json()

    html_file = ''
    for item in index['directory']['item']:
        if item['name'].endswith('.htm') and '10q' in item['name'].lower():
            html_file = item['name']
            break
    if not html_file:
        return "Not found", "Not found"

    html = requests.get(base_url + html_file, headers=headers).text
    soup = BeautifulSoup(html, 'html.parser')
    text = soup.get_text().lower()

    # Try to extract by keyword (very basic)
    mda_start = text.find("management’s discussion and analysis")
    risk_start = text.find("risk factors")

    mda = text[mda_start:mda_start+3000] if mda_start != -1 else "Not found"
    risk = text[risk_start:risk_start+3000] if risk_start != -1 else "Not found"

    return mda, risk

# Use the first recent filing for each company
nvda_mda, nvda_risk = extract_sections_from_edgar(nvda_links[0])
intc_mda, intc_risk = extract_sections_from_edgar(intc_links[0])



<class 'NameError'>: name 'nvda_links' is not defined

In [None]:
# STEP 4
import openai

openai.api_key = "your-openai-api-key"  # Replace with your OpenAI key

def summarize_and_analyze(text):
    prompt = f"Summarize the following SEC filing text and describe the tone (positive, negative, or neutral):\n\n{text}"
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response['choices'][0]['message']['content']

# Summarize and get tone
print("🔍 NVIDIA MD&A Summary:\n", summarize_and_analyze(nvda_mda))
print("\n🔍 Intel MD&A Summary:\n", summarize_and_analyze(intc_mda))


In [None]:
# STEP 5
import yfinance as yf
import matplotlib.pyplot as plt

nvda = yf.download("NVDA", start="2020-01-01")
intc = yf.download("INTC", start="2020-01-01")

plt.figure(figsize=(12, 5))
plt.plot(nvda['Close'], label="NVIDIA", linewidth=2)
plt.plot(intc['Close'], label="Intel", linewidth=2)
plt.title("Stock Price Comparison (2020–Present)")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.grid(True)
plt.show()
