In [None]:
import pandas as pd
import time
import os
import requests
from bs4 import BeautifulSoup
from alpha_vantage.timeseries import TimeSeries
import streamlit as st

ALPHA_VANTAGE_API_KEY = st.secrets["ALPHA_VANTAGE_API_KEY"]

In [4]:
API_KEY = ALPHA_VANTAGE_API_KEY  # üîê Use your real key
ts = TimeSeries(key=API_KEY, output_format='pandas')

os.makedirs("data_alpha", exist_ok=True)

In [5]:
# Get S&P 500 tickers from Wikipedia
def get_sp500_tickers():
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    headers = {'User-Agent': 'Mozilla/5.0'}  # ‚úÖ Bypass 403
    html = requests.get(url, headers=headers).text
    soup = BeautifulSoup(html, 'lxml')
    table = soup.find('table', {'id': 'constituents'})
    df = pd.read_html(str(table))[0]
    tickers = df['Symbol'].tolist()
    return [t.replace('.', '-') for t in tickers]  # e.g., BRK.B ‚Üí BRK-B


In [6]:
# ‚úÖ Full run (limit to 25 for Alpha Vantage free plan)
tickers = get_sp500_tickers()[:25]

# üß™ Or manual test list (comment one out)
# tickers = ['AAPL', 'TSLA', 'GOOGL', 'MSFT', 'META']


  df = pd.read_html(str(table))[0]


In [7]:
def get_stock_data_av(ticker, retries=2):
    for i in range(retries):
        try:
            print(f"üîÑ Fetching: {ticker}")
            data, _ = ts.get_daily(symbol=ticker, outputsize='compact')
            data = data.rename(columns={
                '1. open': 'Open',
                '2. high': 'High',
                '3. low': 'Low',
                '4. close': 'Close',
                '5. volume': 'Volume'
            })
            data['Ticker'] = ticker
            return data.sort_index()
        except KeyboardInterrupt:
            print("‚õî Manual interrupt detected. Stopping...")
            raise
        except Exception as e:
            print(f"‚ö†Ô∏è Error fetching {ticker}: {e}")
            time.sleep(5)
    return pd.DataFrame()



In [8]:
all_data = []
count = 0
max_calls = 25  # free-tier daily limit

for ticker in tickers:
    if count >= max_calls:
        print("‚õî Reached daily call limit.")
        break

    df = get_stock_data_av(ticker)

    if df.empty:
        print(f"‚ùå No data for {ticker}")
        continue

    df.to_csv(f"data_alpha/{ticker}.csv")
    all_data.append(df)
    count += 1
    time.sleep(12)  # avoid hitting rate limit (5 calls/min)



üîÑ Fetching: MMM
üîÑ Fetching: AOS


KeyboardInterrupt: 

In [None]:
if all_data:
    df_final = pd.concat(all_data)
    df_final.to_csv("training_market_data.csv")
    print("‚úÖ Saved training_market_data.csv")
else:
    print("‚ö†Ô∏è No data collected.")

