This script gets data from Alpha Vantage API, and contains:

For 50 of the most traded tickers/companies:

 - news articles (earliest is 2019 if available, and onwards)
 - earning call transcripts (Q1-4 2024),
 - daily stock price (past 100 days)

News articles in specific topics:
 - ‘ipo’, ‘earnings’, ‘mergers_and_acquisitions’, ‘economy_macro’, ‘economy_fiscal’, ‘economy_monetary’, ‘technology’, ‘finance’

Economic indicators (i think its only most recent data):
 - GDP, unemployment, inflation, interest rates

Data is in raw json format, needs extraction and re-organizing for model inputs

In [2]:
import requests
import pandas as pd
import time

# List of stock tickers and topics
tickers = [
    "SPY", "QQQ", "DIA", "AAPL", "MSFT", "NVDA", "GOOGL", "GOOG", "SAP", "META", "PLTR", "AMD", "INTC",
    "TSLA", "AMZN", "WMT", "COST", "JPM", "GS", "MS", "HOOD", "UNH", "PFE", "TEM", "ACN", "IBM",
    "XOM", "SHEL", "CVX", "BRK-A", "MSTR", "NFLX", "V", "CSCO", "LLY", "TME", "NVO", "NOW", 'SNOW', 'HD', 'COR', 'HMC', 'PANW', 'MRK', 'ORCL', 'BAC', 'MA', 'RDFN', 'VNET', 'AVGO'
]
topics = ['ipo', 'earnings', 'mergers_and_acquisitions', 'economy_macro', 'economy_fiscal', 'economy_monetary', 'technology', 'finance']

ALPHA_VANTAGE_API_KEY = "83OBSO8TRSM5ASNS"

# Helper functions
def get_news_from_alphavantage(ticker):
    if ticker in ("SPY", "QQQ", "DIA"):
        return []
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "NEWS_SENTIMENT",
        "tickers": ticker,
        "time_from": "20190101T0000",
        "limit": 1000,
        "apikey": ALPHA_VANTAGE_API_KEY
    }
    r = requests.get(url, params=params)
    if r.status_code == 200:
        return r.json().get("feed", [])
    return []

def get_news_by_topic_from_alphavantage(topic):
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "NEWS_SENTIMENT",
        "topics": topic,
        "time_from": "20190101T0000",
        "limit": 1000,
        "apikey": ALPHA_VANTAGE_API_KEY
    }
    r = requests.get(url, params=params)
    if r.status_code == 200:
        return r.json().get("feed", [])
    return []

def get_stock_prices_from_alphavantage(ticker):
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "TIME_SERIES_DAILY",
        "symbol": ticker,
        "outputsize": "compact",
        "apikey": ALPHA_VANTAGE_API_KEY
    }
    r = requests.get(url, params=params)
    if r.status_code == 200:
        raw = r.json().get("Time Series (Daily)", {})
        return [{
            "ticker": ticker,
            "date": date,
            "open": float(values["1. open"]),
            "high": float(values["2. high"]),
            "low": float(values["3. low"]),
            "close": float(values["4. close"]),
            "volume": int(values["5. volume"])
        } for date, values in raw.items()]
    return []

def get_earnings_from_alphavantage(ticker):
    if ticker in ("SPY", "QQQ", "DIA"):
        return []
    base_url = "https://www.alphavantage.co/query"
    quarters = ['2024Q4', '2024Q3', '2024Q2', '2024Q1']
    earnings = []
    for q in quarters:
        params = {
            "function": "EARNINGS_CALL_TRANSCRIPT",
            "symbol": ticker,
            "quarter": q,
            "apikey": ALPHA_VANTAGE_API_KEY
        }
        r = requests.get(base_url, params=params)
        if r.status_code == 200:
            data = r.json()
            if 'symbol' in data and 'summary' in data:
                earnings.append({
                    "ticker": ticker,
                    "quarter": q,
                    "summary": data.get("summary", ""),
                    "content": data.get("content", "")
                })
    return earnings

def get_economic_indicators():
    indicators = ["REAL_GDP", "FEDERAL_FUNDS_RATE", "INFLATION", "UNEMPLOYMENT"]
    rows = []
    for indicator in indicators:
        url = "https://www.alphavantage.co/query"
        params = {"function": indicator, "apikey": ALPHA_VANTAGE_API_KEY}
        r = requests.get(url, params=params)
        if r.status_code == 200:
            data = r.json().get("data", [])
            for item in data:
                rows.append({
                    "indicator": indicator,
                    "date": item.get("date"),
                    "value": item.get("value")
                })
    return rows

# Data collection
all_prices, all_news, all_earnings, all_topic_news = [], [], [], []

for ticker in tickers:
    print(f"Fetching {ticker}...")
    prices = get_stock_prices_from_alphavantage(ticker)
    news = get_news_from_alphavantage(ticker)
    earnings = get_earnings_from_alphavantage(ticker)

    all_prices.extend(prices)
    all_news.extend([{
        "ticker": ticker,
        "title": article.get("title"),
        "summary": article.get("summary"),
        "sentiment": article.get("overall_sentiment_label"),
        "score": article.get("overall_sentiment_score"),
        "date": article.get("time_published")
    } for article in news])
    all_earnings.extend(earnings)
    time.sleep(15)

# Topics
for topic in topics:
    print(f"Fetching topic: {topic}...")
    news = get_news_by_topic_from_alphavantage(topic)
    all_topic_news.extend([{
        "topic": topic,
        "title": article.get("title"),
        "summary": article.get("summary"),
        "sentiment": article.get("overall_sentiment_label"),
        "score": article.get("overall_sentiment_score"),
        "date": article.get("time_published")
    } for article in news])
    time.sleep(10)

# Economic indicators
economic_data = get_economic_indicators()

# Save to CSV files
pd.DataFrame(all_prices).to_csv("alpha_stock_prices.csv", index=False)
pd.DataFrame(all_news).to_csv("alpha_ticker_news.csv", index=False)
pd.DataFrame(all_earnings).to_csv("alpha_earnings_calls.csv", index=False)
pd.DataFrame(all_topic_news).to_csv("alpha_topic_news.csv", index=False)
pd.DataFrame(economic_data).to_csv("alpha_economic_indicators.csv", index=False)

print("✅ All data saved to CSV files.")



Fetching SPY...
Fetching QQQ...
Fetching DIA...
Fetching AAPL...
Fetching MSFT...
Fetching NVDA...
Fetching GOOGL...
Fetching GOOG...
Fetching SAP...
Fetching META...
Fetching PLTR...
Fetching AMD...
Fetching INTC...
Fetching TSLA...
Fetching AMZN...
Fetching WMT...
Fetching COST...
Fetching JPM...
Fetching GS...
Fetching MS...
Fetching HOOD...
Fetching UNH...
Fetching PFE...
Fetching TEM...
Fetching ACN...
Fetching IBM...
Fetching XOM...
Fetching SHEL...
Fetching CVX...
Fetching BRK-A...
Fetching MSTR...
Fetching NFLX...
Fetching V...
Fetching CSCO...
Fetching LLY...
Fetching TME...
Fetching NVO...
Fetching NOW...
Fetching SNOW...
Fetching HD...
Fetching COR...
Fetching HMC...
Fetching PANW...
Fetching MRK...
Fetching ORCL...
Fetching BAC...
Fetching MA...
Fetching RDFN...
Fetching VNET...
Fetching AVGO...
Fetching topic: ipo...
Fetching topic: earnings...
Fetching topic: mergers_and_acquisitions...
Fetching topic: economy_macro...
Fetching topic: economy_fiscal...
Fetching topic: eco