## 패키지 설치

In [7]:
!pip install -U financedatabase
!pip install -U yahoofinancials

!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
%cd ta-lib
!./configure --prefix=/usr
!make
!make install
!pip install Ta-Lib
%cd ..

!pip install -U zipline-reloaded

## env 설정

In [2]:
import financedatabase as fd
from yahoofinancials import YahooFinancials
from datetime import datetime, timedelta
from zipline.utils.calendar_utils import get_calendar
import pandas as pd
from tqdm import tqdm
import os

In [12]:
class PriceDownloader:
    def __init__(self):
        pass


    def _create_folder(self, folder_name):
        folder_path = os.path.join(os.getcwd(), folder_name)
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
        return folder_path


    def _get_trading_days(self, start_date=None, end_date=None):
        if start_date is None:
            start_date = (datetime.today() - timedelta(days=365 * 20)).strftime('%Y-%m-%d')
        if end_date is None:
            end_date = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')

        trading_calendar = get_calendar("XNYS")  # ("NYSE"도 사용 가능)
        trading_days = trading_calendar.sessions_in_range(pd.Timestamp(start_date), pd.Timestamp(end_date))

        return trading_days

    def download_bundle_prices(self, symbols, start_date=None, end_date=None, folder_name='data', save_error=True):
        error_symbols = []
        self._create_folder(folder_name)
        trading_days = self._get_trading_days(start_date=start_date, end_date=end_date)
        start_date = trading_days[0].strftime('%Y-%m-%d')
        end_date = trading_days[-1].strftime('%Y-%m-%d')

        for symbol in tqdm(symbols, desc='Downloading data'):
            try:
                yahoo_financials = YahooFinancials(symbol)
                prices = yahoo_financials.get_historical_price_data(start_date, end_date, 'daily')
                prices = pd.DataFrame(prices.get(symbol).get('prices')).drop(['date'], axis=1)
                prices = prices.rename(columns={'high': 'High', 'low': 'Low', 'open': 'Open', 'close': 'Close',
                                                'volume': 'Volume', 'adjclose': 'Adj Close', 'formatted_date': 'Date'})
                prices = prices[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
                prices = prices.set_index("Date")
                prices.index = pd.to_datetime(prices.index)

                df = pd.DataFrame(index=trading_days, columns=prices.columns)
                df.index.name = 'Date'
                df.update(prices)
                df = df.ffill().reset_index().dropna()

                # Save the data to a CSV file
                csv_file_path = os.path.join(folder_name, f"{symbol}.csv")
                df.to_csv(csv_file_path, index=False)

            except Exception as e:
                # print(f"{symbol} | {e}")
                error_symbols.append(symbol)
        if save_error:
            # Save error symbols to a CSV file
            df_err = pd.DataFrame({"Symbol": error_symbols})
            csv_file_path = os.path.join(folder_name, "0_error_symbols.csv")
            df_err.to_csv(csv_file_path, index=False)

## bundle 데이터 받기 (Yahoo)

In [13]:
pdn = PriceDownloader()

### US equities data

In [1]:
equities = fd.Equities() 
equity_symbols = equities.select(country='United States').index # symbol list 
pdn.download_bundle_prices(equity_symbols, folder_name='equity_data')

#### Save ETF data

In [None]:
from google.colab import files
!zip -r /content/equity_data.zip /content/equity_data
files.download('/content/equity_data.zip')

### US ETF data

In [None]:
etfs = fd.ETFs()
etf_symbols = list(etfs.select().index) # symbol list 
pdn.download_bundle_prices(equity_symbols, folder_name='etf_data')

#### Save ETF data

In [None]:
from google.colab import files
!zip -r /content/etf_data.zip /content/etf_data
files.download('/content/etf_data.zip')

---