In [2]:
import pandas as pd
import yfinance as yf
from datetime import datetime

# Define tickers and date range
TICKERS = ['SPY', 'BTC-USD', 'ETH-USD', 'XRP-USD', 'SOL-USD', 'DOGE-USD']
START_DATE = '2023-04-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')

data = yf.download(TICKERS, start=START_DATE, end=END_DATE, interval='1h', group_by='Ticker')

# Flatten the MultiIndex DataFrame
combined_data = pd.DataFrame()
for ticker in TICKERS:
    ticker_data = data[ticker].copy()  
    ticker_data['Ticker'] = ticker
    combined_data = pd.concat([combined_data, ticker_data.reset_index()], axis=0)


# Rename columns for clarity
if 'Datetime' in combined_data.columns:
    combined_data.rename(columns={'Datetime': 'Date'}, inplace=True)
else:
    raise KeyError("Column 'Datetime' not found in the downloaded data.")

combined_data['Date'] = pd.to_datetime(combined_data['Date'])  # Ensure Date is datetime

# Remove rows where all price-related columns are NaN
price_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
combined_data.dropna(subset=price_columns, how='all', inplace=True)
combined_data.to_csv('../../data/raw_data.csv', index=False)


[*********************100%***********************]  6 of 6 completed
