In [19]:
# Create Covariance Matrix
# https://medium.com/analytics-vidhya/modern-portfolio-theory-model-implementation-in-python-e416facabf46

In [20]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from IPython.display import display
import pickle
import yfinance as yf

In [21]:
TREASURY_BILL_RATE = 0.04  #%, feb 2021
TRADING_DAYS_PER_YEAR = 250

# Stocks
stocks = ['SPY','AAPL','AMC','BB','F','GME','GRWG','MO','NIO','PLTR','RKT','SNDL','TLRY','TSLA','VIAC']

# Data Locations
processed_input_dir = '../../Data/ProcessedInputs/'

In [22]:
# Needed for type hinting
class Asset:
    pass


def get_log_period_returns(price_history):
    #close = price_history['Close'].values  
    #return np.log(close[1:] / close[:-1]).reshape(-1, 1)
    #return (close[1:] / close[:-1]).reshape(-1, 1)
    return price_history['Close'].pct_change()[1:].values.reshape(-1, 1)


# daily_price_history has to at least have a column, called 'Close'
class Asset:
    def __init__(self, name, daily_price_history):
        self.name = name
        self.daily_returns = get_log_period_returns(daily_price_history)
        self.expected_daily_return = np.mean(self.daily_returns)
  
    @property
    def expected_return(self):
        return TRADING_DAYS_PER_YEAR * self.expected_daily_return

    def __repr__(self):
        return f'<Asset name={self.name}, expected return={self.expected_return}>'

    @staticmethod
    def covariance_matrix(assets):  # tuple for hashing in the cache
        product_expectation = np.zeros((len(assets), len(assets)))
        for i in range(len(assets)):
            for j in range(len(assets)):
                if i == j:
                    product_expectation[i][j] = np.mean(assets[i].daily_returns * assets[j].daily_returns)
                else:
                    product_expectation[i][j] = np.mean(assets[i].daily_returns @ assets[j].daily_returns.T)
    
        product_expectation *= (TRADING_DAYS_PER_YEAR - 1) ** 2

        expected_returns = np.array([asset.expected_return for asset in assets]).reshape(-1, 1)
        product_of_expectations = expected_returns @ expected_returns.T

        return product_expectation - product_of_expectations

In [23]:
def yf_retrieve_data(tickers):
    dataframes = []

    for ticker_name in tickers:
        history = yf.download(ticker_name, 
                  start='2011-01-01', 
                  end='2021-02-28', 
                  progress=False)

        if history.isnull().any(axis=1).iloc[0]:  # the first row can have NaNs
            history = history.iloc[1:]

        assert not history.isnull().any(axis=None), f'history has NaNs in {ticker_name}'
        dataframes.append(history)

    return dataframes

In [24]:
stocks = ['SPY','AAPL','AMC','BB','F','GME','GRWG','MO','NIO','PLTR','RKT','SNDL','TLRY','TSLA','VIAC']

daily_dataframes = yf_retrieve_data(stocks)
assets = tuple([Asset(name, daily_df) for name, daily_df in zip(stocks, daily_dataframes)])

In [25]:
assets

(<Asset name=SPY, expected return=0.12215818952519739>,
 <Asset name=AAPL, expected return=0.2688027778236298>,
 <Asset name=AMC, expected return=0.3951188691566217>,
 <Asset name=BB, expected return=-0.0010473983140068931>,
 <Asset name=F, expected return=0.009496000126166467>,
 <Asset name=GME, expected return=0.5185553858112569>,
 <Asset name=GRWG, expected return=1.0112964209541084>,
 <Asset name=MO, expected return=0.07796950118545687>,
 <Asset name=NIO, expected return=1.3393710552815843>,
 <Asset name=PLTR, expected return=2.8732210813361774>,
 <Asset name=RKT, expected return=0.26079989389245795>,
 <Asset name=SNDL, expected return=0.4792799126862355>,
 <Asset name=TLRY, expected return=0.9686899755320086>,
 <Asset name=TSLA, expected return=0.6260223544486299>,
 <Asset name=VIAC, expected return=0.1797195836222838>)

In [26]:
cov_matrix = Asset.covariance_matrix(assets)

In [27]:
with open(processed_input_dir+'covariance_matrix.bin','wb') as f:
    pickle.dump(cov_matrix,f)

In [28]:
expected_returns = np.array([asset.expected_return for asset in assets]).reshape(-1, 1)

In [29]:
with open(processed_input_dir+'expected_returns.bin','wb') as f:
    pickle.dump(expected_returns,f)

In [30]:
daily_dataframes[0]['Open'].pct_change()[1:]

Date
2011-01-04    0.004893
2011-01-05   -0.005890
2011-01-06    0.008769
2011-01-07   -0.001018
2011-01-10   -0.007683
                ...   
2021-02-22   -0.012778
2021-02-23   -0.006201
2021-02-24    0.004341
2021-02-25    0.010561
2021-02-26   -0.015522
Name: Open, Length: 2554, dtype: float64