In [49]:
import numpy as np
import pandas as pd
import yfinance as yf
import requests
from datetime import datetime

In [67]:
import requests
def get_sp500_tickers(filename='./data/sp500_tickers.txt'):
    """Download S&P500 tickers
    
    :param filename: str e.g. 'pricing.csv'
    :return sp500_tickers: list of ~500 strs e.g. ['AAPL', 'MSFT', etc.]
    """
    # Get file content from GitHub
    sp500_tickers_url = 'https://raw.githubusercontent.com/datasets/s-and-p-500-companies/master/data/constituents_symbols.txt'
    request_sp500_tickers = requests.get(sp500_tickers_url)
    # Save file locally
    open(filename, 'wb').write(request_sp500_tickers.content)
    print(f'Saved file successfully at {filename}')
    # Return list of company tickers
    sp500_tickers = request_sp500_tickers.text.split('\n')
    return sp500_tickers

In [87]:
class StockUniverse:
    def __init__(self):
        return None
    
    def download_data(self, tickers, start="2017-01-01", end="2020-10-23"):
        """Using yfinance to download pricing data
        then save as CSV
        
        :param tickers: list of strings e.g. ['AAPL', 'MSFT', etc.]
        :return data
        """
        # Store characteristics
        self.tickers = tickers
        self.start = pd.Timestamp(start)
        self.end = pd.Timestamp(end)
        # Download the data
        tickers_str = ' '.join(tickers)
        self.df = yf.download(tickers_str, start=start, end=end)
        return None
    
    def store_pickle(
        self, 
        filename=f'./data/tickers_{datetime.today().strftime("%Y-%m-%d")}.pkl',
        indicator_select=['A','O','H','L','C','V']
    ):
        """Store self.df into a local pickle file
        
        :param filename: str e.g. 'pricing.pkl'
        :param indicator_select: list of str - which indicator(s) to save in the csv
            Possible strings are:
            A: adj close; O: open; C: close;
            H: high; L: low, V: volume
        :return filename
        """
        self.filename = filename
        # Select indicator(s) to save in the csv
        indicator_translation = {
            'A': 'Adj Close',
            'O': 'Open',
            'H': 'High',
            'L': 'Low',
            'C': 'Close',
            'V': 'Volume'
        }
        indicator_columns = [
            indicator_translation[each_selection] 
            for each_selection in indicator_select
        ]
        df_to_save = self.df.loc[:, indicator_columns]
        # Save to local
        df_to_save.to_pickle(self.filename)
        print(f'Saved file successfully at {filename}')
        return filename
    
    def read_pickle(
        self,
        filename=f'./data/tickers_{datetime.today().strftime("%Y-%m-%d")}.pkl',
        update_characteristics=True
    ):
        """Read a local pickle file into self.df
        
        :param filename: str e.g. 'pricing.pkl'
        :return self.df
        """
        # Read pickle
        self.df = pd.read_pickle(filename)
        # Update characteristics if necessary
        if update_characteristics:
            self.tickers = self.df.columns.get_level_values(1) # Assumes that self.df.columns is MultiIndex
            self.start, self.end = self.df.index[[0,-1]] # Get start and end time
        return self.df

In [76]:
# Test out __main__ with ['AAPL', 'MSFT']
sp500_tickers = get_sp500_tickers()
sp500 = StockUniverse(['AAPL', 'MSFT'])
sp500.store_pickle(indicator_select=['O'])
sp500.df

Saved file successfully at ./data/sp500_tickers.txt
[*********************100%***********************]  2 of 2 completed
Saved file successfully at ./data/tickers_2020-10-23.pkl


Unnamed: 0_level_0,Adj Close,Adj Close,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Unnamed: 0_level_1,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-01-03,27.548414,58.826855,29.037500,62.580002,29.082500,62.840000,28.690001,62.130001,28.950001,62.790001,115127600,20694100
2017-01-04,27.517582,58.563644,29.004999,62.299999,29.127501,62.750000,28.937500,62.119999,28.962500,62.480000,84472400,21340000
2017-01-05,27.657520,58.563644,29.152500,62.299999,29.215000,62.660000,28.952499,62.029999,28.980000,62.189999,88774400,24876000
2017-01-06,27.965857,59.071262,29.477501,62.840000,29.540001,63.150002,29.117500,62.040001,29.195000,62.299999,127007600,19922900
2017-01-09,28.222006,58.883255,29.747499,62.639999,29.857500,63.080002,29.485001,62.540001,29.487499,62.759998,134247600,20382700
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-16,119.019997,219.660004,119.019997,219.660004,121.550003,222.289993,118.809998,219.320007,121.279999,220.149994,115393800,26057900
2020-10-19,115.980003,214.220001,115.980003,214.220001,120.419998,222.300003,115.660004,213.720001,119.959999,220.419998,120639300,27625800
2020-10-20,117.510002,214.649994,117.510002,214.649994,118.980003,217.369995,115.629997,213.089996,116.199997,215.800003,124423700,22753500
2020-10-21,116.870003,214.800003,116.870003,214.800003,118.709999,216.919998,116.449997,213.119995,116.669998,213.119995,89946000,22724900


In [88]:
# Test out read_pickle()
StockUniverse().read_pickle()

Unnamed: 0_level_0,Adj Close,Adj Close,Open,Open,High,High,Low,Low,Close,Close,Volume,Volume
Unnamed: 0_level_1,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT,AAPL,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-01-03,27.548414,58.826855,28.950001,62.790001,29.082500,62.840000,28.690001,62.130001,29.037500,62.580002,115127600,20694100
2017-01-04,27.517582,58.563644,28.962500,62.480000,29.127501,62.750000,28.937500,62.119999,29.004999,62.299999,84472400,21340000
2017-01-05,27.657520,58.563644,28.980000,62.189999,29.215000,62.660000,28.952499,62.029999,29.152500,62.299999,88774400,24876000
2017-01-06,27.965857,59.071262,29.195000,62.299999,29.540001,63.150002,29.117500,62.040001,29.477501,62.840000,127007600,19922900
2017-01-09,28.222006,58.883255,29.487499,62.759998,29.857500,63.080002,29.485001,62.540001,29.747499,62.639999,134247600,20382700
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-16,119.019997,219.660004,121.279999,220.149994,121.550003,222.289993,118.809998,219.320007,119.019997,219.660004,115393800,26057900
2020-10-19,115.980003,214.220001,119.959999,220.419998,120.419998,222.300003,115.660004,213.720001,115.980003,214.220001,120639300,27625800
2020-10-20,117.510002,214.649994,116.199997,215.800003,118.980003,217.369995,115.629997,213.089996,117.510002,214.649994,124423700,22753500
2020-10-21,116.870003,214.800003,116.669998,213.119995,118.709999,216.919998,116.449997,213.119995,116.870003,214.800003,89946000,22724900
