In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# 定义Magnificent 7股票代码
mag7_tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA"] 

# 设置时间范围（近10年）
end_date = datetime.now()
start_date = end_date - timedelta(days=365*10)

# 下载数据
data = yf.download(mag7_tickers, start=start_date, end=end_date, group_by='ticker')

# 重塑数据为长格式
df_list = []
for ticker in mag7_tickers:
    ticker_data = data[ticker].copy()
    ticker_data['Ticker'] = ticker
    ticker_data.reset_index(inplace=True)
    df_list.append(ticker_data)

# 合并所有数据
final_df = pd.concat(df_list, ignore_index=True)

# 保存到Parquet文件
final_df.to_parquet('mag7_10years_data.parquet', engine='pyarrow', index=False)

print("数据已保存到 mag7_10years_data.parquet")

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  7 of 7 completed

7 Failed downloads:
['AAPL', 'NVDA', 'MSFT', 'TSLA', 'AMZN', 'GOOGL', 'META']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


数据已保存到 mag7_10years_data.parquet


In [None]:
import yfinance as yf
import pandas as pd
import time
from datetime import datetime, timedelta
from typing import List

def download_ticker_data(ticker: str, start_date: str, end_date: str, retries: int = 3, delay: int = 30) -> pd.DataFrame:
    """
    Download data for a single ticker with retry logic and delay to handle rate limits.
    
    Args:
        ticker: Stock ticker symbol
        start_date: Start date in 'YYYY-MM-DD' format
        end_date: End date in 'YYYY-MM-DD' format
        retries: Number of retry attempts
        delay: Delay in seconds between retries
    
    Returns:
        DataFrame with stock data
    """
    for attempt in range(retries):
        try:
            # Download data with auto_adjust set explicitly
            data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False)
            if not data.empty:
                data['Ticker'] = ticker
                return data
            else:
                print(f"No data returned for {ticker}. Retrying ({attempt+1}/{retries})...")
                time.sleep(delay)
        except Exception as e:
            if "Rate limited" in str(e) or "429" in str(e):
                print(f"Rate limit hit for {ticker}. Waiting {delay} seconds... ({attempt+1}/{retries})")
                time.sleep(delay)
            else:
                print(f"Error downloading {ticker}: {e}. Retrying ({attempt+1}/{retries})...")
                time.sleep(delay)
    print(f"Failed to download data for {ticker} after {retries} attempts.")
    return pd.DataFrame()

def download_mag7_data(tickers: List[str], start_date: str, end_date: str) -> pd.DataFrame:
    """
    Download data for multiple tickers and combine into a single DataFrame.
    
    Args:
        tickers: List of ticker symbols
        start_date: Start date in 'YYYY-MM-DD' format
        end_date: End date in 'YYYY-MM-DD' format
    
    Returns:
        Combined DataFrame with all ticker data
    """
    df_list = []
    for ticker in tickers:
        print(f"Downloading data for {ticker}...")
        data = download_ticker_data(ticker, start_date, end_date)
        if not data.empty:
            df_list.append(data)
        time.sleep(30)  # Add delay between ticker downloads to avoid rate limits
    if df_list:
        final_df = pd.concat(df_list, ignore_index=False)
        final_df.reset_index(inplace=True)
        return final_df
    else:
        print("No data downloaded for any tickers.")
        return pd.DataFrame()

# Define MAG7 tickers
mag7_tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA"] 

# Set time range (last 10 years)
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=365*10)).strftime('%Y-%m-%d')

# Download data
data = download_mag7_data(mag7_tickers, start_date, end_date)

# Save to Parquet file if data is available
if not data.empty:
    data.to_parquet('mag7_10years_data.parquet', engine='pyarrow', index=False)
    print("Data saved to mag7_10years_data.parquet")
else:
    print("No data to save.")

# Display sample of data
if not data.empty:
    print("\nSample of downloaded data:")
    print(data.head())

Downloading data for AAPL...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


No data returned for AAPL. Retrying (1/3)...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


No data returned for AAPL. Retrying (2/3)...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


No data returned for AAPL. Retrying (3/3)...
Failed to download data for AAPL after 3 attempts.
