In [252]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import pandas_datareader.data as web
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [224]:
def get_prices(tickers, start, end, types=None, data_source='yahoo', out_path=None, sort_tks=False):
    """Download prices from external source.
    Args:
        tickers (str or list): The tickers to be downloaded.
        start, end (str): The start date and end date of target period.
        types: The price type(s) to download. If not specified will download
               all types.
        data_source: The data source to use for downloading.
                     See pandas_datareader doc.
        out_path: If specified, the results will be saved to specified path.
        sort_tks: If the tickers in result should be sorted.

    Returns:
        bool: The return value. True for success, False otherwise.    
    
    """
    if isinstance(tickers, str):
        tickers = [tickers]
    if isinstance(types, str):
        types = [types]
    if(sort_tks):
        tickers = sorted(tickers)
    if data_source == 'yahoo':
        df = get_prices_from_yahoo(tickers, start, end, types)
    if out_path is not None:
        df.to_csv(out_path)
    return df


def get_prices_from_yahoo(tickers, start, end, types=None):
    """Download daily prices from Yahoo!."""
    valid_types = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
    if types is not None and not all(i in valid_types for i in types):
        raise ValueError(
            f"Wrong 'types' provided. Must be chosen from{valid_types}.")
    # download from yahoo
    df = web.DataReader(name=tickers, data_source='yahoo', start=start, end=end)
    # hardcoded 1 day before inception dates(for fixing yahoo data)
    inception_dates = {
      'DOMO': '2018-06-28',
      'PS': '2018-05-16',
      'SMAR': '2018-04-26',
      'TWLO': '2016-06-22',
      'ZUO': '2018-04-11',
      'MB': '2015-06-21',
      'GDDY': '2015-04-15',
      'HDP': '2014-12-14',
      'SHOP': '2015-05-21',
      'TEAM': '2015-12-15',
      'PD': '2019-04-11'
    }
    # fix inception dates
    for tk in tickers:
        if tk in inception_dates:
            df.loc[:inception_dates[tk], pd.IndexSlice[:, tk]] = np.nan
    # filter types if provided
    if types is not None:
        df = df[types]
    df = df.apply(print_and_fill_gaps)
    # send warnings if no data
    df.apply(
        lambda i: print("WARNING: ", i.name,
                        "has no data during the selected period!")
        if i.isna().all() else None)
    return df


def print_and_fill_gaps(series):
    if series.isna().all():
        return series
    s = series.copy()
    trading_idx = s.loc[~s.isna()].index
    first_day = min(trading_idx)
    last_day = max(trading_idx)
    s_trading = s[first_day:last_day]
    if s_trading.isna().any():
        print("Gaps found and filled in ", s.name, " :")
        print(s_trading[s_trading.isna()].index.strftime('%Y%m%d').tolist())
        s[first_day:last_day] = s[first_day:last_day].fillna(method='ffill')
    return s

In [225]:
get_prices('AAPL', '2004-01-01', '2004-12-01')

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2004-01-02,0.327807,0.380000,0.388393,0.378214,0.384821,144642400
2004-01-05,0.341517,0.395893,0.399821,0.382500,0.382500,395018400
2004-01-06,0.340285,0.394464,0.400357,0.387679,0.397321,509348000
2004-01-07,0.347987,0.403393,0.407679,0.391607,0.394643,586874400
2004-01-08,0.359849,0.417143,0.423750,0.404464,0.407857,460303200
...,...,...,...,...,...,...
2004-11-24,0.986657,1.143750,1.164286,1.099107,1.101607,1390788000
2004-11-26,0.994359,1.152679,1.174286,1.148929,1.166964,550144000
2004-11-29,1.054282,1.222143,1.242321,1.203750,1.231250,1712916800
2004-11-30,1.032870,1.197321,1.228393,1.197321,1.228393,1028518400


In [226]:
get_prices(['AAPL'], '2004-01-01', '2004-12-01')

Attributes,Adj Close,Close,High,Low,Open,Volume
Symbols,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2004-01-02,0.327807,0.380000,0.388393,0.378214,0.384821,144642400
2004-01-05,0.341517,0.395893,0.399821,0.382500,0.382500,395018400
2004-01-06,0.340285,0.394464,0.400357,0.387679,0.397321,509348000
2004-01-07,0.347987,0.403393,0.407679,0.391607,0.394643,586874400
2004-01-08,0.359849,0.417143,0.423750,0.404464,0.407857,460303200
...,...,...,...,...,...,...
2004-11-24,0.986657,1.143750,1.164286,1.099107,1.101607,1390788000
2004-11-26,0.994359,1.152679,1.174286,1.148929,1.166964,550144000
2004-11-29,1.054282,1.222143,1.242321,1.203750,1.231250,1712916800
2004-11-30,1.032870,1.197321,1.228393,1.197321,1.228393,1028518400


In [228]:
get_prices(['AAPL'], '2004-01-01', '2004-12-01', 'High')

Attributes,High
Symbols,AAPL
Date,Unnamed: 1_level_2
2004-01-02,0.388393
2004-01-05,0.399821
2004-01-06,0.400357
2004-01-07,0.407679
2004-01-08,0.423750
...,...
2004-11-24,1.164286
2004-11-26,1.174286
2004-11-29,1.242321
2004-11-30,1.228393


In [229]:
get_prices(['AAPL'], '2004-01-01', '2004-12-01', ['Close', 'High'])

Attributes,Close,High
Symbols,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2004-01-02,0.380000,0.388393
2004-01-05,0.395893,0.399821
2004-01-06,0.394464,0.400357
2004-01-07,0.403393,0.407679
2004-01-08,0.417143,0.423750
...,...,...
2004-11-24,1.143750,1.164286
2004-11-26,1.152679,1.174286
2004-11-29,1.222143,1.242321
2004-11-30,1.197321,1.228393


In [230]:
get_prices(['GOOGL', 'AAPL'], '2004-01-01', '2004-12-01', ['Close', 'High'])

Attributes,Close,Close,High,High
Symbols,GOOGL,AAPL,GOOGL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2004-01-02,,0.380000,,0.388393
2004-01-05,,0.395893,,0.399821
2004-01-06,,0.394464,,0.400357
2004-01-07,,0.403393,,0.407679
2004-01-08,,0.417143,,0.423750
...,...,...,...,...
2004-11-24,87.467468,1.143750,88.693695,1.164286
2004-11-26,89.784782,1.152679,90.105103,1.174286
2004-11-29,90.615616,1.222143,91.566566,1.242321
2004-11-30,91.081078,1.197321,91.591591,1.228393


In [232]:
get_prices(['GOOGL', 'AAPL'], '2004-01-01', '2004-12-01', 'High')

Attributes,High,High
Symbols,GOOGL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2004-01-02,,0.388393
2004-01-05,,0.399821
2004-01-06,,0.400357
2004-01-07,,0.407679
2004-01-08,,0.423750
...,...,...
2004-11-24,88.693695,1.164286
2004-11-26,90.105103,1.174286
2004-11-29,91.566566,1.242321
2004-11-30,91.591591,1.228393


In [247]:
%%time
get_prices(['AAPL', 'DOMO', 'PS', 'SMAR'], '1980-01-01', '2020-12-23', ['Adj Close', 'Close'], sort_tks=True)

Wall time: 1.92 s


Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close
Symbols,AAPL,DOMO,PS,SMAR,AAPL,DOMO,PS,SMAR
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1980-12-12,0.101087,,,,0.128348,,,
1980-12-15,0.095813,,,,0.121652,,,
1980-12-16,0.088780,,,,0.112723,,,
1980-12-17,0.090978,,,,0.115513,,,
1980-12-18,0.093615,,,,0.118862,,,
...,...,...,...,...,...,...,...,...
2020-12-16,127.809998,60.459999,20.400000,72.500000,127.809998,60.459999,20.400000,72.500000
2020-12-17,128.699997,66.989998,20.430000,73.400002,128.699997,66.989998,20.430000,73.400002
2020-12-18,126.660004,66.260002,20.540001,72.250000,126.660004,66.260002,20.540001,72.250000
2020-12-21,128.229996,67.570000,20.379999,71.970001,128.229996,67.570000,20.379999,71.970001
