In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime

from finrl.config import config
from finrl.marketdata.yahoodownloader import YahooDownloader
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.env.env_portfolio import StockPortfolioEnv
import plotly
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

from finrl.model.models import DRLAgent
from finrl.trade.backtest import backtest_stats, backtest_plot, get_daily_return, get_baseline,convert_daily_return_to_pyfolio_ts
import matplotlib.pyplot as plt 
%matplotlib inline
import sys
sys.path.append("../FinRL-Library")

import pandas as pd
import numpy as np
import warnings
import six
import matplotlib.pyplot as plt
import scipy.optimize as sco 
from copy import deepcopy
import itertools
from datetime import timedelta
from pandas.tseries.offsets import BDay
warnings.filterwarnings('ignore')
from datetime import date 

import scipy.cluster.hierarchy as sch
import numpy as np
import pandas as pd
from datetime import date
from matplotlib import pyplot as plt
import cvxopt as opt
from cvxopt import blas, solvers


Module "zipline.assets" not found; multipliers will not be applied to position notionals.



In [2]:
def get_price(assets):
    # config.DOW_30_TICKER
    df = YahooDownloader(start_date = '1995-01-01',
                         end_date = '2030-01-01',
                         ticker_list =assets ).fetch_data() #,'LQD','IEO','GLD'

    price = df.set_index(['date','tic'])[['close']].unstack()
    price.columns = price.columns.droplevel()
    price.index = pd.to_datetime(price.index)
    daily_rts = price/price.shift(1) -1
    return price, daily_rts


def backtest(price,weights, rf): 
    """both price and weights needs to be in daily"""
    asset_class = list(weights.columns.unique())
    returns = (price/price.shift(1)-1).resample('B').last()
    returns = returns.loc[weights.index[0]:weights.index[-1]]
    weights = weights.resample('B').last()

    returns['portfolio_returns'] = (returns*weights).sum(axis = 1)
    returns['portfolio_cum_returns'] = (returns['portfolio_returns']+1).cumprod() -1
    port_mean_returns = returns['portfolio_returns'].mean()*260
    port_total_returns =  returns['portfolio_cum_returns'][-1]
    port_sigma = returns['portfolio_returns'].std() * np.sqrt(260)
    port_sharpe = (port_mean_returns - rf)/ port_sigma
    print('Asset class:')
    print(asset_class)
    print('Mean returns: {}, total returns: {}, sharpe: {}'.format(port_mean_returns,port_total_returns,port_sharpe))
    returns['portfolio_cum_returns'].iplot(title = 'cumulative returns');
    return returns


def getIVP(cov, **kargs):
    # Compute the inverse-variance portfolio
    ivp = 1. / np.diag(cov)
    ivp /= ivp.sum()
    return ivp


def getClusterVar(cov,cItems):
    # Compute variance per cluster
    cov_=cov.loc[cItems,cItems] # matrix slice
    w_=getIVP(cov_).reshape(-1,1)
    cVar=np.dot(np.dot(w_.T,cov_),w_)[0,0]
    return cVar


def getQuasiDiag(link):
    # Sort clustered items by distance
    link = link.astype(int)
    sortIx = pd.Series([link[-1, 0], link[-1, 1]])
    numItems = link[-1, 3]  # number of original items
    while sortIx.max() >= numItems:
        sortIx.index = range(0, sortIx.shape[0] * 2, 2)  # make space
        df0 = sortIx[sortIx >= numItems]  # find clusters
        i = df0.index
        j = df0.values - numItems
        sortIx[i] = link[j, 0]  # item 1
        df0 = pd.Series(link[j, 1], index=i + 1)
        sortIx = sortIx.append(df0)  # item 2
        sortIx = sortIx.sort_index()  # re-sort
        sortIx.index = range(sortIx.shape[0])  # re-index
    return sortIx.tolist()


def getRecBipart(cov, sortIx):
    # Compute HRP alloc
    w = pd.Series(1, index=sortIx)
    cItems = [sortIx]  # initialize all items in one cluster
    while len(cItems) > 0:
        cItems = [i[j:k] for i in cItems for j, k in ((0, len(i) // 2), (len(i) // 2, len(i))) if len(i) > 1]  # bi-section
        for i in range(0, len(cItems), 2):  # parse in pairs
            cItems0 = cItems[i]  # cluster 1
            cItems1 = cItems[i + 1]  # cluster 2
            cVar0 = getClusterVar(cov, cItems0)
            cVar1 = getClusterVar(cov, cItems1)
            alpha = 1 - cVar0 / (cVar0 + cVar1)
            w[cItems0] *= alpha  # weight 1
            w[cItems1] *= 1 - alpha  # weight 2
    return w


def correlDist(corr):
    # A distance matrix based on correlation, where 0<=d[i,j]<=1
    # This is a proper distance metric
    dist = ((1 - corr) / 2.)**.5  # distance matrix
    return dist


def getHRP(cov, corr):
    # Construct a hierarchical portfolio
    dist = correlDist(corr)
    link = sch.linkage(dist, 'single')
    #dn = sch.dendrogram(link, labels=cov.index.values, label_rotation=90)
    #plt.show()
    sortIx = getQuasiDiag(link)
    sortIx = corr.index[sortIx].tolist()
    hrp = getRecBipart(cov, sortIx)
    return hrp.sort_index()

def getMVP(cov):

    cov = cov.T.values
    n = len(cov)
    N = 100
    mus = [10 ** (5.0 * t / N - 1.0) for t in range(N)]

    # Convert to cvxopt matrices
    S = opt.matrix(cov)
    #pbar = opt.matrix(np.mean(returns, axis=1))
    pbar = opt.matrix(np.ones(cov.shape[0]))

    # Create constraint matrices
    G = -opt.matrix(np.eye(n))  # negative n x n identity matrix
    h = opt.matrix(0.0, (n, 1))
    A = opt.matrix(1.0, (1, n))
    b = opt.matrix(1.0)

    # Calculate efficient frontier weights using quadratic programming
    portfolios = [solvers.qp(mu * S, -pbar, G, h, A, b)['x']
                  for mu in mus]
    ## CALCULATE RISKS AND RETURNS FOR FRONTIER
    returns = [blas.dot(pbar, x) for x in portfolios]
    risks = [np.sqrt(blas.dot(x, S * x)) for x in portfolios]
    ## CALCULATE THE 2ND DEGREE POLYNOMIAL OF THE FRONTIER CURVE
    m1 = np.polyfit(returns, risks, 2)
    x1 = np.sqrt(m1[2] / m1[0])
    # CALCULATE THE OPTIMAL PORTFOLIO
    wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']

    return list(wt)

def get_all_portfolios(returns):
    
    cov, corr = returns.cov(), returns.corr()
    hrp = getHRP(cov, corr)
    ivp = getIVP(cov)
    ivp = pd.Series(ivp, index=cov.index)
    mvp = getMVP(cov)
    mvp = pd.Series(mvp, index=cov.index)
    
    portfolios = pd.DataFrame([mvp, ivp, hrp], index=['MVP', 'IVP', 'HRP']).T
    
    return portfolios

def hrp_weight_generator(training_window,allocation_window,df):

    all_weights = pd.DataFrame()
    #loop the dates based on allocation window
    for i in range( training_window, len(df), allocation_window):
        training_df = df.iloc[i - training_window: i ].fillna(0.0001*2)
        hrp = get_all_portfolios(training_df)
        hrpdic = dict(zip(hrp.index, hrp['HRP']))

        #generate a weights dataframe for each allocation window
        weights_df = pd.DataFrame(data = hrpdic ,
                     columns = training_df.columns, 
                                # index = df.index[i : i +allocation_window] )
                    index =   pd.bdate_range(start =df.index[i] , end = df.index[i] + BDay(allocation_window) ) )
        all_weights = all_weights.append( weights_df)
    all_weights.index.name = 'asofdate'
    all_weights = all_weights.reset_index().drop_duplicates().set_index('asofdate')
    return all_weights


In [5]:
price, daily_rts = get_price(['SPY','QQQ'])
price, daily_rts =price.resample('B').first(), daily_rts.resample('B').first().dropna()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (12398, 8)


In [6]:
%%capture
weights = {}
weights['hrp'] = hrp_weight_generator(7,7, daily_rts)
weights['hrp'] = weights['hrp'].resample('B').first()

In [7]:
daily_rts['portfolio'] = pd.Series(weights['hrp']['QQQ']  * daily_rts['QQQ' ]  + weights['hrp']['SPY']  * daily_rts['SPY' ] )

In [8]:
backtest(price, weights['hrp'].loc['2016-01-01':], 0.02)[['portfolio_cum_returns']]

Asset class:
['QQQ', 'SPY']
Mean returns: 0.18849379412352865, total returns: 1.6709783917149696, sharpe: 0.9103655931851606


tic,portfolio_cum_returns
date,Unnamed: 1_level_1
2016-01-01,0.000000
2016-01-04,0.000000
2016-01-05,-0.000089
2016-01-06,-0.011139
2016-01-07,-0.038625
...,...
2021-09-14,1.730084
2021-09-15,1.751655
2021-09-16,1.750369
2021-09-17,1.720735


In [9]:
weights['hrp'].tail(20)

tic,QQQ,SPY
asofdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-08-25,0.46237,0.53763
2021-08-26,0.46237,0.53763
2021-08-27,0.46237,0.53763
2021-08-30,0.46237,0.53763
2021-08-31,0.46237,0.53763
2021-09-01,0.344819,0.655181
2021-09-02,0.344819,0.655181
2021-09-03,0.344819,0.655181
2021-09-06,0.344819,0.655181
2021-09-07,0.344819,0.655181


In [10]:
def calculate_actions(old_balance,new_balance, price): 
    for i in new_balance.keys():
        print(i)
        print((new_balance[i] - old_balance[i])/price[i][-1])



def calculate_weights(balance): 
    balance['total'] = balance.sum(axis = 1) 
    allocation_w = {}
    for i in balance.columns : 
        allocation_w[i] = balance[i] / balance['total']
    del allocation_w['total']
    return allocation_w

In [13]:
old_balance =  pd.DataFrame({'QQQ':1462,'SPY': 1302}, index = [date.today()])
asset =list( old_balance.columns )
incoming_capital = 6000
new_capital = old_balance[asset].sum(axis =  1) + incoming_capital
target_weights =dict( zip(weights['hrp'].columns,weights['hrp'].iloc[-1]))
latest_weights =dict( zip(weights['hrp'].columns,weights['hrp'].iloc[-1]))
new_balance  = {x : new_capital*latest_weights[x] for x in latest_weights.keys() }

In [14]:
calculate_actions(old_balance,new_balance, price)

QQQ
2021-09-20    7.780121
dtype: float64
SPY
2021-09-20    7.256137
dtype: float64


In [13]:
new_balance

{'QQQ': 2021-07-27    16545.347049
 dtype: float64,
 'SPY': 2021-07-27    11394.102951
 dtype: float64}

In [None]:
allocation_usd = pd.DataFrame({'QQQ':4979.94,'SPY': 5135.52}, index = [date.today()])

In [None]:
allocation_usd['total'] = allocation_usd.sum(axis = 1) 

In [None]:
allocation_usd

In [None]:
allocation_w = allocation_usd.copy()
for i in allocation_usd.columns : 
    allocation_w[i] = allocation_usd[i] / allocation_usd['total']

In [None]:
new_capital = allocation_usd['total']  + 3300

In [None]:

new_balance  = {x : new_capital*latest_weights[x] for x in latest_weights.keys() }

In [None]:
new_capital

In [None]:
for i in ['QQQ','SPY']: 
    print(i)
    print((new_balance[i] - allocation_usd[i])/price[i][-1])