In [43]:
import os
import scipy
import numpy as np
import scipy.io
import pandas as pd
import time

In [22]:
def format_appro(f, data_folder = 'data', ext = '.mat'):
    if not '/' in f:
        f = os.path.join('./' + data_folder, f)
    if not f.endswith('.mat'):
        f = f + '.mat'
    return f

def construct_command(f, o, data_folder = 'data/'):
    f = format_appro(f, data_folder)
    o = format_appro(o, data_folder)
    return "computecov '{}' '{}'".format(f, o)

In [34]:
def construct_string_commands(clist):
    string = "matlab -nodisplay -nodesktop -r \""
    for c in clist:
        string += "{}; ".format(c)
    string += "exit;\""
    return string

In [3]:
#To generate fake data
ogcwd = os.getcwd()
os.chdir('./matlab/data')
for i in range(2):
    mdict = {'S': np.eye(3) + np.random.random((3,3)) * 0.1}
    scipy.io.savemat('testS{}.mat'.format(i), mdict)
os.chdir(ogcwd)

In [4]:
c1 = construct_command('testS0.mat', 'testOut0.mat')
c2 = construct_command('testS1.mat', 'testOut1.mat')

In [5]:
string = "matlab -nodisplay -nodesktop -r \"{}; {}; exit;\"".format(c1, c2)

In [6]:
string

'matlab -nodisplay -nodesktop -r "computecov \'./data/testS0.mat\' \'./data/testOut0.mat\'; computecov \'./data/testS1.mat\' \'./data/testOut1.mat\'; exit;"'

In [35]:
construct_string_commands([c1,c2])

'matlab -nodisplay -nodesktop -r "computecov \'./data/testS0.mat\' \'./data/testOut0.mat\'; computecov \'./data/testS1.mat\' \'./data/testOut1.mat\'; exit;"'

In [8]:
ogcwd = os.getcwd()
os.chdir('./matlab')
os.system(string)
os.chdir(ogcwd)

In [27]:
for i in range(2):
    fname = format_appro('testOut{}'.format(i), data_folder = 'matlab/data')
    Sigma = scipy.io.loadmat(fname)['Sigma']
    print(Sigma)

[[1.02009569 0.05214497 0.01695453]
 [0.05214497 1.06811428 0.04147603]
 [0.01695453 0.04147603 1.0043045 ]]
[[1.09284461 0.08821154 0.01536312]
 [0.08821154 1.09078149 0.06647691]
 [0.01536312 0.06647691 1.0078532 ]]


In [29]:
ret = pd.read_csv('data/ret.csv', header = None).values 
#ret.shape = (10344, 3251), (day, stock)
univ = pd.read_csv('data/topMV95.csv', header = None).values 
univ -= 1 #because Matlab is 1 indexed
#univ.shape = (360, 1000), (OOS month, sorted list of stocks to consider for that period)
dates = pd.read_csv('data/mydatestr.txt', header = None, parse_dates = [0]) 
#dates.shape = (10344, 1), date for each day in return (not a numpy array, but a dataframe with DT objects)
tradeidx = pd.read_csv('data/investDateIdx.csv', header = None).values 
tradeidx -= 1 #because Matlab is 1 indexed!
#tradeidx.shape = (360, 1), (row of univ -> index in ret matrix)
ret[ret == -500] = np.nan
ret = ret / 100 #ret is in percent
ret_nonan = ret.copy()
ret_nonan[np.isnan(ret)] = 0
#ret[np.isnan(ret)] = 0 #sometimes we're missing OOS returns
meta_info = [ret, ret_nonan, univ, tradeidx]

In [30]:
#Populates with real data
ogcwd = os.getcwd()
os.chdir('./matlab/data')


N = 200 #Number of stocks
T = 1260 #lookback (units of days) --> should be in months though

for h in range(len(univ)):#len(univ)):
    universe = univ[h,:N]
    today = tradeidx[h][0]
    date = dates.iloc[today].values[0]
    pastPeriod_startdate = dates.iloc[today-T].values[0]
    #print("Past period start date", pastPeriod_startdate, "Period start date", date)

    pastPeriod = range(today-T, today)            
    pastRet = ret[pastPeriod][:, universe]
    S = np.cov(pastRet.T) #sample covariance
    mdict = {'S': S}
    scipy.io.savemat('N_{}_T_{}_unividx_{}.mat'.format(N, T, h), mdict)
os.chdir(ogcwd)

In [36]:
all_commands = []
for h in range(len(univ)):
    c = construct_command('N_{}_T_{}_unividx_{}.mat'.format(N, T, h), 'out_N_{}_T_{}_unividx_{}.mat'.format(N, T, h))
    all_commands.append(c)

In [37]:
print(all_commands)

["computecov './data/N_200_T_1260_unividx_0.mat' './data/out_N_200_T_1260_unividx_0.mat'", "computecov './data/N_200_T_1260_unividx_1.mat' './data/out_N_200_T_1260_unividx_1.mat'", "computecov './data/N_200_T_1260_unividx_2.mat' './data/out_N_200_T_1260_unividx_2.mat'", "computecov './data/N_200_T_1260_unividx_3.mat' './data/out_N_200_T_1260_unividx_3.mat'", "computecov './data/N_200_T_1260_unividx_4.mat' './data/out_N_200_T_1260_unividx_4.mat'", "computecov './data/N_200_T_1260_unividx_5.mat' './data/out_N_200_T_1260_unividx_5.mat'", "computecov './data/N_200_T_1260_unividx_6.mat' './data/out_N_200_T_1260_unividx_6.mat'", "computecov './data/N_200_T_1260_unividx_7.mat' './data/out_N_200_T_1260_unividx_7.mat'", "computecov './data/N_200_T_1260_unividx_8.mat' './data/out_N_200_T_1260_unividx_8.mat'", "computecov './data/N_200_T_1260_unividx_9.mat' './data/out_N_200_T_1260_unividx_9.mat'", "computecov './data/N_200_T_1260_unividx_10.mat' './data/out_N_200_T_1260_unividx_10.mat'", "comput

In [45]:
string = construct_string_commands(all_commands)

In [None]:
start = time.time()
ogcwd = os.getcwd()
os.chdir('./matlab')
os.system(string)
os.chdir(ogcwd)
print(time.time() - start)

In [None]:
def retConstShare(retMat, w):
    n, p = retMat.shape
    if len(w.shape) == 1:
        w = np.expand_dims(w, 1)
    assert(w.shape == (p,1))
    wSum1 = w/np.sum(w)

    totalRetMat = 1 + retMat

    cummProdd = np.cumprod(totalRetMat, axis = 0)
    navVec = np.matmul(cummProdd, wSum1)

    wEnd = cummProdd[n-1, :]
    wEnd = np.dot(wEnd, w) #since w is (p,1) but wEnd is (1,p)
    wEnd = wEnd/np.sum(wEnd)
    wEnd = wEnd.T

    navVecTot = np.concatenate((np.ones((1,1)), navVec[:(n-1),]))

    totalRetVec = np.divide(navVec, navVecTot)

    retVec = totalRetVec - 1
    retVec = retVec * np.sum(w)

    return np.sum(retVec) #sum of all of the returns

def get_OOS_rets(meta_info):
    ret, _, univ, tradeidx = meta_info
    rets = []
    opt_rets = []
    N = 200 #Number of stocks
    T = 1260 #lookback (units of days) --> should be in months though
    P = 1 #lookahead (units of months, since we multiply by 20)
    
    for h in range(len(univ)):#len(univ)):
        universe = univ[h,:N]
        today = tradeidx[h][0]
        cov = scipy.io.loadmat(format_appro('out_N_{}_T_{}_unividx_{}.mat'.format(N, T, h), 'matlab/data'))
        cov = cov['Sigma']
        w = optimal_weights(cov)
        investPeriod = range(today, today + P*21)
        outRet = ret[investPeriod][:, universe]
        curret = retConstShare(outRet, w)
        rets.append(curret)
    return rets

def print_normalize(a):
    print(np.mean(a) * 252 / 20, np.std(a) * np.sqrt(252 / 20))

In [None]:
MTP2_rets = get_OOS_rets(meta_info)
print_normalize(MTP2_rets)