This code is similar to the Trading Strategy Simulation file, except the weight allocation strategy used here is CAPM tangency portfolio weight optimization.

In [2]:
import pandas as pd
import numpy as np

In [3]:
stock_df = pd.read_csv('yfinanceAPI_monthly3.csv')
stock_df = stock_df[['ticker', 'date2', 'monthly_return']]
stock_df.columns = ['Ticker', 'Date', 'Monthly_return']
stock_df

Unnamed: 0,Ticker,Date,Monthly_return
0,A,2016-01,-9.949780
1,A,2016-02,-0.796819
2,A,2016-03,6.693440
3,A,2016-04,2.982247
4,A,2016-05,12.145643
...,...,...,...
138419,ZVLO,2019-09,0.000000
138420,ZVLO,2019-10,0.000000
138421,ZVLO,2019-11,0.000000
138422,ZVLO,2019-12,0.000000


In [4]:
stock_returns_grouped = stock_df.groupby(['Ticker'])
stock_returns_list = list(stock_returns_grouped)
stock_returns = {}
for i in stock_returns_list:
    stock_returns[i[0]] = i[1]
stock_returns

{'A':    Ticker     Date  Monthly_return
 0       A  2016-01       -9.949780
 1       A  2016-02       -0.796819
 2       A  2016-03        6.693440
 3       A  2016-04        2.982247
 4       A  2016-05       12.145643
 5       A  2016-06       -3.079331
 6       A  2016-07        8.453566
 7       A  2016-08       -2.348822
 8       A  2016-09        0.483155
 9       A  2016-10       -7.475033
 10      A  2016-11        0.941002
 11      A  2016-12        3.892492
 12      A  2017-01        7.484643
 13      A  2017-02        4.758028
 14      A  2017-03        3.316183
 15      A  2017-04        4.123317
 16      A  2017-05        9.609465
 17      A  2017-06       -1.488077
 18      A  2017-07        0.809305
 19      A  2017-08        8.245527
 20      A  2017-09       -0.803485
 21      A  2017-10        6.184058
 22      A  2017-11        1.778613
 23      A  2017-12       -3.064274
 24      A  2018-01        9.646091
 25      A  2018-02       -6.591323
 26      A  2018-03    

In [5]:
risk_free = pd.read_csv('TB3MS.csv')
risk_free

Unnamed: 0,Date,Risk Free Return
0,2016-01,0.02
1,2016-02,0.03
2,2016-03,0.02
3,2016-04,0.02
4,2016-05,0.02
5,2016-06,0.02
6,2016-07,0.03
7,2016-08,0.03
8,2016-09,0.02
9,2016-10,0.03


In [6]:
dow_df = pd.read_csv('DOW30.csv')
dow = list(dow_df['Ticker'])
dow

['MSFT',
 'INTC',
 'KO',
 'AAPL',
 'PG',
 'HD',
 'IBM',
 'HON',
 'NKE',
 'V',
 'CVX',
 'BA',
 'CAT',
 'VZ',
 'MCD',
 'GS',
 'JPM',
 'MMM',
 'TRV',
 'CSCO',
 'MRK',
 'JNJ',
 'AXP',
 'DOW',
 'AMGN',
 'DIS',
 'WMT',
 'UNH',
 'CRM',
 'WBA']

In [7]:
classification_df = pd.read_csv('RF_Prediction.csv')
classification_df

Unnamed: 0,Ticker,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01
0,A,1,1,1,1,1,1
1,AA,1,1,1,1,1,0
2,AAL,1,1,1,1,1,1
3,AAN,1,1,1,1,1,1
4,AAOI,1,0,0,0,1,0
...,...,...,...,...,...,...,...
2206,ZNGA,1,1,1,1,1,1
2207,ZOM,0,0,0,1,0,0
2208,ZSAN,0,0,0,0,0,0
2209,ZTS,1,1,1,1,1,1


In [8]:
months = list(classification_df.columns)[1:]
months

['2019-08', '2019-09', '2019-10', '2019-11', '2019-12', '2020-01']

In [9]:
positives = {}
for month in months:
    positives[month] = list(classification_df['Ticker'][classification_df[month] == 1])
positives

{'2019-08': ['A',
  'AA',
  'AAL',
  'AAN',
  'AAOI',
  'AAP',
  'AAPL',
  'AAWW',
  'AB',
  'ABBV',
  'ABC',
  'ABCB',
  'ABG',
  'ABM',
  'ABMD',
  'ABT',
  'ACAD',
  'ACC',
  'ACCO',
  'ACHC',
  'ACIA',
  'ACN',
  'ACNB',
  'ACOR',
  'ADBE',
  'ADDDF',
  'ADI',
  'ADM',
  'ADP',
  'ADRO',
  'ADS',
  'ADSK',
  'ADSW',
  'ADT',
  'ADTN',
  'ADUS',
  'AEE',
  'AEIS',
  'AEO',
  'AEP',
  'AES',
  'AFI',
  'AFL',
  'AGCO',
  'AGIO',
  'AGO',
  'AGR',
  'AIMC',
  'AIN',
  'AIR',
  'AIRI',
  'AIT',
  'AIZ',
  'AJRD',
  'AJX',
  'AKAM',
  'AKR',
  'ALB',
  'ALGN',
  'ALGT',
  'ALL',
  'ALLE',
  'ALLY',
  'ALOT',
  'ALRM',
  'ALRS',
  'ALSN',
  'ALTR',
  'ALXN',
  'AMAG',
  'AMAT',
  'AMBK',
  'AMCX',
  'AMD',
  'AME',
  'AMED',
  'AMEH',
  'AMGN',
  'AMH',
  'AMNB',
  'AMP',
  'AMRC',
  'AMRS',
  'AMSF',
  'AMT',
  'AMTD',
  'AMWD',
  'AMZN',
  'AN',
  'ANAT',
  'ANDE',
  'ANET',
  'ANF',
  'ANIK',
  'ANSS',
  'ANTM',
  'AOS',
  'AOSL',
  'AP',
  'APA',
  'APD',
  'APEI',
  'APO',
  'APPN',

In [10]:
regression_df = pd.read_csv('VAR_predicted_returns_new.csv')
regression_df

Unnamed: 0,Ticker,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01
0,A,3.633120,1.941639,-0.989184,5.475272,-2.035121,1.343574
1,AA,14.259083,-1.763753,6.228646,6.482264,-0.420609,0.550378
2,AAL,16.110883,1.934161,-2.993551,-31.895237,7.919258,10.497010
3,AAN,1.698000,6.734586,1.934730,36.191153,-18.230439,-3.851623
4,AAP,-9.364314,-6.389783,-19.404428,38.331948,7.888393,12.771770
...,...,...,...,...,...,...,...
1320,ZGNX,2.006088,-15.245307,10.392096,21.964742,1.974457,0.896183
1321,ZION,0.560555,-7.599444,-4.146234,2.135531,8.367711,5.627621
1322,ZNGA,-7.435770,13.148979,10.374427,2.200228,2.926059,-8.774518
1323,ZTS,2.940268,2.485700,4.705163,4.650009,5.501232,0.829577


In [11]:
def volatility(stock_returns, ticker, end):
    df = stock_returns[ticker]
    returns = df[df.Date <= end]['Monthly_return']
    volatility = returns.std()
    return volatility

In [12]:
def mean_return(stock_returns, ticker, end):
    df = stock_returns[ticker]
    returns = df[df.Date <= end]['Monthly_return']
    mean = returns.mean()
    return mean

In [119]:
def sharpe(stock_returns, ticker, date, predicted):
    vol = volatility(stock_returns, ticker, date)
    mean = mean_return(stock_returns, ticker, date)
    #sharpe = predicted / vol
    sharpe = mean / vol
    return sharpe

In [120]:
def previous_month(date):
    year = int(date.split('-')[0])
    month = int(date.split('-')[1])
    if month > 1:
        month = month - 1
    else:
        month = 12
        year = year - 1
    return str(year) + '-' + str(month).zfill(2)

In [121]:
def next_month(date):
    year = int(date.split('-')[0])
    month = int(date.split('-')[1])
    if month < 12:
        month = month + 1
    else:
        month = 1
        year = year + 1
    return str(year) + '-' + str(month).zfill(2)

In [122]:
def top(classification, regression, dow30, date, num, last):
    result = []
    if last == False:
        positives = list(classification.loc[(classification[date] == 1) | (classification[next_month(date)] == 1)]['Ticker'])
    else:
        positives = list(classification.loc[classification[date] == 1]['Ticker'])
    makesense_df = regression[['Ticker', date]].sort_values(date, ascending=False)
    makesense_df = makesense_df[(makesense_df[date] <= 20) & (makesense_df[date] >= 2)]
    makesense_df['sharpe'] = makesense_df.apply(lambda x: sharpe(stock_returns, x['Ticker'], previous_month(date), x[date]), axis=1)
    makesense = list(makesense_df.sort_values('sharpe', ascending=False)['Ticker'])
    #makesense = list(makesense_df.sort_values(date, ascending=False)['Ticker'])
    count = 1
    i = 0
    while count <= num:
        if makesense[i] in positives and makesense[i] not in dow30:
            result.append(makesense[i])
            count += 1
        i = i + 1
    return result

In [123]:
portfolio_stocks_new_by_month = {}
for i in range(len(months)):
    if i != len(months) - 1:
        portfolio_stocks_new_by_month[months[i]] = top(classification_df, regression_df, dow, months[i], 10, False)
    else:
        portfolio_stocks_new_by_month[months[i]] = top(classification_df, regression_df, dow, months[i], 10, True)
portfolio_stocks_new_by_month

{'2019-08': ['HEI',
  'PLNT',
  'MDB',
  'CABO',
  'INTU',
  'MA',
  'OKTA',
  'RCM',
  'PGR',
  'CTAS'],
 '2019-09': ['HEI',
  'CWST',
  'BAND',
  'WM',
  'CABO',
  'MDB',
  'PLNT',
  'MA',
  'TDY',
  'REXR'],
 '2019-10': ['CWST',
  'NEE',
  'WCN',
  'WM',
  'AYX',
  'TDY',
  'CABO',
  'NOVT',
  'REXR',
  'MA'],
 '2019-11': ['CWST',
  'NEE',
  'RSG',
  'WCN',
  'SUI',
  'CABO',
  'TDY',
  'WM',
  'REXR',
  'ELS'],
 '2019-12': ['CWST',
  'WCN',
  'TDY',
  'NOVT',
  'AYX',
  'REXR',
  'ELS',
  'APPF',
  'MSA',
  'PLNT'],
 '2020-01': ['WCN',
  'TDY',
  'MA',
  'MSA',
  'SUI',
  'ADBE',
  'AMD',
  'SPGI',
  'CDNS',
  'OKTA']}

In [124]:
def get_returns(tickers, date, rf, returns):
    df = rf[rf['Date'] < date].set_index('Date')['Risk Free Return']
    for ticker in tickers:
        temp = returns[ticker]
        risky = temp[temp['Date'] < date].set_index('Date')['Monthly_return']
        df = pd.concat([df, risky], axis=1)
    names = copy.deepcopy(tickers)
    names.insert(0, 'RF')
    df.columns = names
    return df

In [None]:
def get_expectations(tickers, date, regression):
    ind=[regression.index[regression['Ticker']==i].tolist() for i in tickers]
    flat_ind=[item for sublist in ind for item in sublist]
    expectations = regression.reindex(flat_ind)[['Ticker', date]].set_index('Ticker').rename_axis([None]).squeeze()
    return expectations

In [133]:
# CAPM tangency portfolio weight optimization
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.base_optimizer import BaseConvexOptimizer
from pypfopt import objective_functions
def portfolio_weights_optimization(tickers, date, rf, returns):
    returns_df = get_returns(tickers, date, rf, returns)
    riskfree_return = returns_df['RF'] / 100
    risky_returns = returns_df.iloc[:, 1:] / 100
    excess_return = risky_returns.subtract(riskfree_return, axis=0)
    mu = excess_return.mean()
    sigma = excess_return.cov()
    ef = EfficientFrontier(mu, sigma)
    #ef.add_objective(objective_functions.L2_reg, gamma=2)
    w = ef.max_sharpe()
    return ef.clean_weights()

In [134]:
# get the portfolio after weight optimization
def portfolio_after_optimization(weights):
    result = {}
    for ticker in weights.keys():
        weight = weights[ticker]
        if weight >= 0.01:
            result[ticker] = weight
    return result

In [135]:
import copy
def portfolio_by_month(stocks_by_month, returns, months, rf, classification, regression):
    result = {}
    result_weights = {}
    ages = {}
    initial = stocks_by_month[months[0]]
    weights = portfolio_weights_optimization(initial, months[0], rf, returns)
    new_list = portfolio_after_optimization(weights)
    initial = list(new_list.keys())
    result[months[0]] = initial
    result_weights[months[0]] = new_list
    for stock in initial:
        ages[stock] = 0
    result[months[0]] = initial
    for i in range(1, len(months)):
        temp = copy.deepcopy(initial)
        month = months[i]
        for stock in temp:
            ages[stock] += 1
        for stock in temp:
            if int(classification[classification['Ticker'] == stock][month]) == 0 and float(regression[regression['Ticker'] == stock][month]) < 0:
                temp.remove(stock)
        for stock in temp:
            if ages[stock] >= 2:
                month_pre = previous_month(month)
                month_prepre = previous_month(month_pre)
                return_df = returns[stock]
                if float(return_df.loc[(return_df['Date'] == month_pre)]['Monthly_return']) < 0 and float(return_df.loc[return_df['Date'] == month_prepre]['Monthly_return']) < 0:
                    temp.remove(stock)
        potential_new = stocks_by_month[month]
        for stock in potential_new:
            if stock not in temp:
                temp.append(stock)
                ages[stock] = 0
        weights = portfolio_weights_optimization(temp, month, rf, returns)
        new_list = portfolio_after_optimization(weights)
        temp = list(new_list.keys())
        result[month] = temp
        result_weights[month] = new_list
        initial = temp
    return result, result_weights

In [136]:
portfolio_tracking = portfolio_by_month(portfolio_stocks_new_by_month, stock_returns, months, risk_free, classification_df, regression_df)
portfolio_tracking

({'2019-08': ['HEI', 'PLNT', 'MDB', 'OKTA', 'RCM'],
  '2019-09': ['PLNT', 'MDB', 'OKTA', 'CWST'],
  '2019-10': ['MDB', 'CWST', 'NEE', 'AYX', 'NOVT'],
  '2019-11': ['MDB', 'CWST', 'NEE', 'AYX', 'NOVT'],
  '2019-12': ['MDB', 'CWST', 'NEE', 'AYX', 'NOVT'],
  '2020-01': ['MDB', 'CWST', 'NEE', 'NOVT', 'AMD', 'OKTA']},
 {'2019-08': {'HEI': 0.10923,
   'PLNT': 0.38476,
   'MDB': 0.23572,
   'OKTA': 0.15562,
   'RCM': 0.11468},
  '2019-09': {'PLNT': 0.06842,
   'MDB': 0.23644,
   'OKTA': 0.15051,
   'CWST': 0.54463},
  '2019-10': {'MDB': 0.14087,
   'CWST': 0.41895,
   'NEE': 0.21682,
   'AYX': 0.0856,
   'NOVT': 0.13775},
  '2019-11': {'MDB': 0.16081,
   'CWST': 0.40492,
   'NEE': 0.23126,
   'AYX': 0.02943,
   'NOVT': 0.17359},
  '2019-12': {'MDB': 0.16597,
   'CWST': 0.41004,
   'NEE': 0.21186,
   'AYX': 0.04358,
   'NOVT': 0.16857},
  '2020-01': {'MDB': 0.12969,
   'CWST': 0.37399,
   'NEE': 0.29655,
   'NOVT': 0.11646,
   'AMD': 0.02789,
   'OKTA': 0.04942}})

In [137]:
def get_actual_returns(tracking, returns):
    result = {}
    for date in tracking.keys():
        tickers = tracking[date]
        temp = {}
        for ticker in tickers:
            df = returns[ticker]
            actual_return = float(df[df['Date'] == date]['Monthly_return'])
            temp[ticker] = actual_return
        result[date] = temp
    return result    
actual_returns = get_actual_returns(portfolio_tracking[0], stock_returns)
actual_returns

{'2019-08': {'HEI': 5.791594280212031,
  'PLNT': -10.233921531884585,
  'MDB': 6.346876316446104,
  'OKTA': -3.309639815373666,
  'RCM': -7.313196199312166},
 '2019-09': {'PLNT': -18.042772367005316,
  'MDB': -20.89816473762015,
  'OKTA': -22.166008628875364,
  'CWST': -5.626376644595643},
 '2019-10': {'MDB': 6.050791071556928,
  'CWST': 1.5137437043945434,
  'NEE': 2.296228276153633,
  'AYX': -14.828260504443382,
  'NOVT': 8.969654578514243},
 '2019-11': {'MDB': 16.38099777340143,
  'CWST': 7.157602479842009,
  'NEE': -1.371655794582738,
  'AYX': 24.07650139813866,
  'NOVT': 4.6378406646628845},
 '2019-12': {'MDB': -11.492936576077485,
  'CWST': -1.4557917330420778,
  'NEE': 3.566842954378391,
  'AYX': -11.855896440762761,
  'NOVT': -5.086926216189591},
 '2020-01': {'MDB': 27.216770685595087,
  'CWST': 12.035631245810707,
  'NEE': 11.467636039627529,
  'NOVT': 6.399814423868588,
  'AMD': 6.3672004580964,
  'OKTA': 15.229261746155887}}

In [138]:
stock_df2 = pd.read_csv('yfinanceAPI_monthly3.csv')
stock_df2 = stock_df2[['ticker', 'date2', 'Adj Close']]
stock_df2.columns = ['Ticker', 'Date', 'Price']
stock_df2

Unnamed: 0,Ticker,Date,Price
0,A,2016-01,36.028095
1,A,2016-02,35.741016
2,A,2016-03,38.133320
3,A,2016-04,39.270550
4,A,2016-05,44.040211
...,...,...,...
138419,ZVLO,2019-09,0.050000
138420,ZVLO,2019-10,0.050000
138421,ZVLO,2019-11,0.050000
138422,ZVLO,2019-12,0.050000


In [139]:
def get_price(prices, ticker, date):
    return float(prices[(prices['Ticker'] == ticker) & (prices['Date'] == date)]['Price'])

In [140]:
def HPR(stock_tracking, weight_tracking, prices):
    portfolio_value = 1000000
    for date in weight_tracking.keys():
        stock_value_start = 0
        stock_value_end = 0
        weights = weight_tracking[date]
        tickers = stock_tracking[date]
        for ticker in tickers:
            weight = weights[ticker]
            price_start = get_price(prices, ticker, previous_month(date))
            price_end = get_price(prices, ticker, date)
            num = int(portfolio_value * weight / price_start)
            value_start = num * price_start
            value_end = num * price_end
            stock_value_start += value_start
            stock_value_end += value_end
        cash = portfolio_value - stock_value_start
        new_value = stock_value_end + cash
        portfolio_value = new_value
    HPR = portfolio_value / 1000000 - 1
    print(HPR)
HPR(portfolio_tracking[0], portfolio_tracking[1], stock_df2)

0.009352390708923242
