## 패키지 설치

In [7]:
!pip install -U financedatabase
!pip install -U yahoofinancials
!pip install -U zipline-reloaded

## env 설정

In [7]:
import financedatabase as fd
from yahoofinancials import YahooFinancials
from datetime import datetime, timedelta
from zipline.utils.calendar_utils import get_calendar
import pandas as pd
from tqdm import tqdm
import os

## ETF 리스트 추출 wtih fd

In [2]:
etfs = fd.ETFs()
symbols = list(etfs.select().index)

## ETF 데이터베이스 확인 (Yahoo)

In [8]:
end_date = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=365 * 20)).strftime('%Y-%m-%d')
print(f'{start_date}-{end_date}')
trading_calendar = get_calendar("XNYS")
trading_days = trading_calendar.sessions_in_range(pd.Timestamp(start_date), pd.Timestamp(end_date))
# print(trading_days)

2004-01-02-2023-12-27


In [9]:
folder_name = 'data'
folder_path = os.path.join(os.getcwd(), folder_name)
if not os.path.exists(folder_path):
    os.mkdir(folder_path)

In [21]:
error_symbol = []

for symbol in tqdm(symbols):
  try:
    yahoo_financials = YahooFinancials(symbol)
    prices = yahoo_financials.get_historical_price_data(start_date, end_date, 'daily')
    prices = pd.DataFrame(prices.get(symbol).get('prices')).drop(['date'],axis=1)
    prices = prices.rename(columns={'high':'High', 'low':'Low', 'open':'Open', 'close':'Close', 'volume':'Volume', 'adjclose':'Adj Close','formatted_date':'Date'})
    prices = prices[['Date','Open','High','Low','Close','Adj Close','Volume']]
    prices = prices.set_index("Date")
    prices.index = pd.to_datetime(prices.index)
      
    df = pd.DataFrame(index=trading_days, columns=prices.columns)
    df.index.name = 'Date'
    df.update(prices)
    df = df.ffill().reset_index().dropna()
    df.to_csv(f"./{folder_name}/{symbol}.csv", index=False)
  except Exception as e:
    # print(e)
    error_symbol.append(symbol)

#save error symbols
df_err = pd.DataFrame({"Symbol": error_symbol})
csv_file_path = "error_symbols.csv"
df_err.to_csv(csv_file_path, index=False)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00,  7.67it/s]


## Save ETF data

In [None]:
from google.colab import files
!zip -r /content/data.zip /content/data
files.download('/content/data.zip')
files.download('/content/error_symbols.csv')

---