In [2]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import scipy.linalg
import linear_shrinkage
import Util
from Util import evaluate_curret, get_invest_period
from os.path import join
from generating_MTP import generate_mat as generate_MTP2
import os
import time
import sklearn.covariance
import pickle
from collections import namedtuple, defaultdict
from subprocess import Popen
from multiprocessing import Pool

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
util_loaded_data = Util.load_data()
ret, ret_nonan, univ, tradeidx, dates = util_loaded_data

## For saving to pastRets (pastRet and KTcov) and outRets

In [4]:
assert os.path.exists('./pastRets')
assert os.path.exists('./outRets')

In [5]:
def save_past_information(T, N, util_loaded_data, save_KT = True):
    ret, ret_nonan, univ, tradeidx, dates = util_loaded_data
    pastRets = []
    KT_covs = []
    
    KT_time = 0
    for h in range(len(univ)):
        print(h, end = ' ')
        
        pastRet = Util.get_past_period(h, T, N, univ, tradeidx, ret)
        pastRetFileName = "pastRets/{}_{}_{}_pastRet.pkl".format(T, N, h)
        with open(pastRetFileName, 'wb') as f:
            pickle.dump(pastRet, f)
        pastRets.append(pastRet)
        
        if save_KT:
            #start = time.time()
            KT_cov = Util.kendall_cov(pastRet)
            KT_cov_file_name =  "pastRets/{}_{}_{}_KTcov.pkl".format(T, N, h)
            with open(KT_cov_file_name, 'wb') as f:
                pickle.dump(KT_cov, f)
            KT_covs.append(KT_cov)
            end = time.time()
            #print(end-start)
    
    with open("pastRets/{}_{}_pastRets.pkl".format(T,N), 'wb') as f:
        pickle.dump(pastRets, f)

    if save_KT:
        with open("pastRets/{}_{}_KTcovs.pkl".format(T,N), 'wb') as f:
            pickle.dump(KT_covs, f)

In [6]:
#for old purposes
# for T in [25, 50, 100, 200, 500, 1260]:
#     for N in [25, 50, 100, 200, 500, 1000]:
#         save_past_information(T, N, util_loaded_data, save_KT = False)

In [7]:
#for one-off
# save_past_information(800,200, util_loaded_data, save_KT=False)

In [8]:
#for saving KT covs in parallel
def save_KT_cov(h):
    KT_cov_file_name =  "pastRets/{}_{}_{}_KTcov.pkl".format(T, N, h)
    print(h)
    if h == 0:
        print("N={}, T={}".format(N,T))
    start = time.time()
    ret, ret_nonan, univ, tradeidx, dates = util_loaded_data
    pastRet = Util.get_past_period(h, T, N, univ, tradeidx, ret)
    KT_cov = Util.kendall_cov(pastRet)
    KT_cov_file_name =  "pastRets/{}_{}_{}_KTcov.pkl".format(T, N, h)
    with open(KT_cov_file_name, 'wb') as f:
        pickle.dump(KT_cov, f)
    end = time.time()
    print(end-start)

def consolidate_KT_cov(N,T):
    KT_covs = []
    for h in range(360):
        fname = "pastRets/{}_{}_{}_KTcov.pkl".format(T, N, h)
        with open(fname, 'rb') as f:
            cov = pickle.load(f)
            KT_covs.append(cov)
    with open("pastRets/{}_{}_KTcovs.pkl".format(T,N), 'wb') as f:
            pickle.dump(KT_covs, f)

for N, T in []:
    pool = Pool(8)
    pool.map(save_KT_cov,range(360))
    consolidate_KT_cov(N,T)

In [9]:
def save_outrets(N, util_loaded_data, P=1):
    ret, ret_nonan, univ, tradeidx, dates = util_loaded_data
    all_outrets = []
    for h in range(360):
        outret = get_invest_period(h, P, N, univ, tradeidx, ret)
        all_outrets.append(outret)
    with open('outRets/{}_outRets.pkl'.format(N), 'wb') as f:
        pickle.dump(all_outrets, f)

for N in []:#[25, 50, 100, 200, 500, 1000]:
    save_outrets(N, util_loaded_data)

## After this point is the algorithms and experiments, above was just generating data

In [10]:
from estimators import (MTP2_wrapper, MTP2_cov_wrapper,
                        LRPS_wrapper,
                        CLIME_wrapper, CLIME_cov_wrapper,
                        old_LS_wrapper, LS_wrapper, NLS_wrapper,
                        POET_wrapper, POET_5_wrapper,
                        glasso_wrapper,
                        get_AFM_estimator)

In [11]:
all_methods = { #all time estimates for N = 100
    'glasso': glasso_wrapper, #2 seconds
    #'MTP2': MTP2_wrapper, #25 seconds
    #'MTP2_cov': MTP2_cov_wrapper,
    'CLIME': CLIME_wrapper,
    #'CLIME_cov': CLIME_cov_wrapper
    #'LS': LS_wrapper, #0.5 seconds
    #'old_LS_cov': old_LS_cov_wrapper,
    'old_LS': old_LS_wrapper,
    #'NLS':  NLS_wrapper, #5 seconds
    #'LRPS': LRPS_wrapper, #7 seconds
    #'AFM_NL': get_AFM_estimator(5, 'NLS', tradeidx), #4 seconds
    'AFM_LS': get_AFM_estimator(5, 'LS', tradeidx), #0.5 seconds
    'POET': POET_wrapper,
    'POET_5': POET_5_wrapper,
    #'AFM_POET': None,
    #'equiweight': None,
    #'POET': POET_wrapper
}

### Time estimate

In [13]:
T = 200
N = 200
with open('pastRets/{}_{}_pastRets.pkl'.format(T,N), 'rb') as f:
    pastRets = pickle.load(f)
timing_dict = {}
for method_name, m_func in all_methods.items():
    assert 'cov' not in method_name, 'Timing is not supported for methods with cov'
    if 'glasso' in method_name:
        continue
    h = 0
    print(method_name, end = ' ')
    args = []
    start = time.time()
    if 'AFM' in method_name:
        args = [h]
    cov = m_func(pastRets[h], *args)
    end = time.time()
    print(end-start)
    timing_dict[method_name] = end-start

CLIME b'[1] "Loaded X"\n'
b'[1] 0.1627624\n'
b'$`clime(X, lambda = lambda_opt, sigma = FALSE, standardize = FALSE, perturb =`\n'
b'<environment: 0x78d26b0>\n'
b'\n'
b'$`solve(Sigma)`\n'
b'<environment: 0x78d1760>\n'
b'\n'
b'$`solve.default(Sigma)`\n'
b'<environment: 0x78d1530>\n'
b'\n'
b'attr(,"error.message")\n'
b'[1] "Error in solve.default(Sigma) : \\n  system is computationally singular: reciprocal condition number = 2.82808e-19\\nCalls: clime -> solve -> solve.default\\n"\n'
b'attr(,"class")\n'
b'[1] "dump.frames"\n'
b''
0.9377560615539551
old_LS 0.1506514549255371
AFM_LS 0.183488130569458
POET b'[1] "Loaded X"\n'
b'[1] "Done with POET"\n'
b''
2.017026901245117
POET_5 b'[1] "Loaded X"\n'
b'[1] "Done with POET"\n'
b''
2.0004496574401855


In [14]:
print(timing_dict.items())
print("Total hours: ", sum(timing_dict.values()))

dict_items([('CLIME', 0.9377560615539551), ('old_LS', 0.1506514549255371), ('AFM_LS', 0.183488130569458), ('POET', 2.017026901245117), ('POET_5', 2.0004496574401855)])
Total hours:  5.289372205734253


### Run versions

In [15]:
assert os.path.exists("./pickle")
assert os.path.exists('./run_info')

In [16]:
def write_fail_log(method_name, T, N, h, KT_or_not, run_name):
    fname = "run_info/fail_log_{}.txt".format(run_name)
    if not os.path.exists(fname):
        with open(fname, 'w') as f:
            f.write("FAIL LOG\n")
    with open(fname, 'a') as f:
        f.write(str((method_name, T, N, h, KT_or_not)))

In [None]:
skip_existing = True
run_name = "first_server_run"
KT_or_not = False
N_T_list = [(100,50),(100,100),(100,200),(100,400),(100,1260),
            (200,100),(200,200),(200,400), (200,800), (200, 1260),
            (500,250), (500,500), (500,1000), (500,1260)]

print("Running with run_name={} and KT_or_not={}".format(run_name, KT_or_not))
print("All methods are: " + str(all_methods.keys()))
print("N, T list is: {}".format(N_T_list))
if skip_existing:
    print("SKIP EXISTING IS TRUE!")

if input("Save information?") == "Y":

    run_info_name = "run_info/{}.info".format(run_name)
    run_info = {
        'run_name': run_name,
        'KT_or_not': KT_or_not,
        'N_T_list': N_T_list,
        'all_methods_keys': list(all_methods.keys())
    }
    if os.path.exists(run_info_name) and input("Run info already exists, want to append?") == 'Y':
        with open(run_info_name, 'rb') as f:
            old_run_info = pickle.load(f)
        old_run_info['N_T_list'] = list(set(old_run_info['N_T_list'] + N_T_list))
        all_methods_keys_list = list(all_methods.keys())
        old_run_info['all_methods_keys'] = list(set(old_run_info['all_methods_keys'] + all_methods_keys_list))
        run_info = old_run_info
        
    with open(run_info_name, 'wb') as f:
        pickle.dump(run_info, f)

for N, T in N_T_list:
    if not os.path.exists('pastRets/{}_{}_pastRets.pkl'.format(T,N)):
        print("pastRets doesn't exist for N,T={},{}".format(N,T))
        save_past_information(T,N, util_loaded_data, save_KT=False)
    if not os.path.exists('outRets/{}_outRets.pkl'.format(N)):
        print("outRets doesn't exist for N={}".format(N))
        save_outrets(N, util_loaded_data, P=1)
    
for N, T in N_T_list:
    with open('pastRets/{}_{}_pastRets.pkl'.format(T,N), 'rb') as f:
        pastRets = pickle.load(f)
        assert len(pastRets) == 360

    if os.path.isfile('pastRets/{}_{}_KTcovs.pkl'.format(T,N)):
        with open('pastRets/{}_{}_KTcovs.pkl'.format(T,N), 'rb') as f:
            pastCovs = pickle.load(f)
    else:
        print("pastCovs doesn't exist for T={} N={}".format(T,N))
        pastCovs = []

    with open('outRets/{}_outRets.pkl'.format(N), 'rb') as f:
        outRets = pickle.load(f)
        assert len(outRets) == 360

    print("Loaded all relevant information")

    def get_covs_all_methods(h):
        print("WORKING ON h={}".format(h))
        for method_name, method in all_methods.items():
            print('Starting on {}'.format(method_name))
            fname = 'pickle/{}_{}_{}_{}_{}_{}_covEst.pkl'.format(T, N, h, method_name, KT_or_not, run_name)
            if skip_existing and os.path.exists(fname):
                continue
            #USING PASTCOVS
            if 'cov' in method_name:
                args = []
                if 'LS' in method_name or 'CLIME' in method_name:
                    #args is number of samples
                    args = [T]
                cov = method(cov=pastCovs[h], *args)
            else:
            #USING PASTRETS
                args = []
                if 'AFM' in method_name:
                    args = [h]
                try:
                    cov = method(pastRets[h], *args)
                except:
                    write_fail_log(method_name, T, N, h, KT_or_not, run_name)
                    cov = 'FAIL'

            with open(fname, 'wb') as f:
                pickle.dump(cov, f)
    
    pool = Pool(15)
    pool.map(get_covs_all_methods, range(360))

Running with run_name=first_server_run and KT_or_not=False
All methods are: dict_keys(['glasso', 'CLIME', 'old_LS', 'AFM_LS', 'POET', 'POET_5'])
N, T list is: [(100, 50), (100, 100), (100, 200), (100, 400), (100, 1260), (200, 100), (200, 200), (200, 400), (200, 800), (200, 1260), (500, 250), (500, 500), (500, 1000), (500, 1260)]
SKIP EXISTING IS TRUE!
Save information?Y
Run info already exists, want to append?N
pastCovs doesn't exist for T=50 N=100
Loaded all relevant information
WORKING ON h=0
WORKING ON h=12
WORKING ON h=6
WORKING ON h=18
WORKING ON h=24
WORKING ON h=36
WORKING ON h=42
WORKING ON h=48
WORKING ON h=30
Starting on glasso
Starting on glasso
Starting on glasso
WORKING ON h=60
WORKING ON h=66
WORKING ON h=72
WORKING ON h=54
WORKING ON h=78
Starting on glasso
Starting on glasso
WORKING ON h=84
Starting on glasso
Starting on glasso
Starting on glasso
Starting on CLIME
Starting on glasso
Starting on CLIME
Starting on CLIME
Starting on glasso
Starting on glasso
Starting on gl

  * coefs)
  * coefs)
