# Testbench

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.covariance import EmpiricalCovariance, LedoitWolf
from warnings import warn

import covariance_DRO

sns.set(rc={'figure.figsize':(8,4)})

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data/48_Industry_Portfolios.CSV', header=6, nrows=1132)

In [3]:
df['year'] = (df['Unnamed: 0']/100).astype(int)
df['month'] = (df['Unnamed: 0']%100).astype(int)

In [4]:
train_start = 1990
train_end = 2010
test_end = 2020

In [5]:
train = df.loc[(df.year>=train_start) & (df.year<train_end)]
test = df.loc[(df.year>=train_end) & (df.year<test_end)]
columns = [c for c in train.columns if c not in ['Unnamed: 0', 'year', 'month']]
train_X = train[columns].to_numpy()
test_X = test[columns].to_numpy()

In [6]:
def portfolio_tester(train, test, cov_estimator, verbose=False):
    # estimate covariance matrix and asset allocation
    cov = cov_estimator(train)
    ones = np.ones((cov.shape[0],1))
    w = (cov@ones)/(ones.T@cov@ones)
    # calculate portfolio returns and metrics
    portfolio_returns = test@w
    std = portfolio_returns.std()
    mean = portfolio_returns.mean()
    sharpe = mean/std
    if verbose:
        print('Mean: {}'.format(mean))
        print('Std: {}'.format(std))
        print('Sharpe: {}'.format(sharpe))
    return mean, std, sharpe

# Defining Leave-1-Out Covariance Estimator

In [7]:
def l1o_cv(X, method, epsilon_candidates):
    best_epsilon, best_std = None, float('inf')
    for e in epsilon_candidates:
        returns = []
        for i in range(X.shape[0]):
            # train-validation split
            val = np.expand_dims(X[i], 0)
            train = X[np.arange(0, X.shape[0], 1) != i]
            # find the weight
            cov = covariance_DRO.estimate_cov(EmpiricalCovariance(assume_centered=False).fit(train).covariance_, e, method)
            ones = np.ones((cov.shape[0],1))
            w = (cov@ones)/(ones.T@cov@ones)
            # calculate portfolio returns and metrics
            returns.append(val@w)
        # keep track of best epsilon
        if np.std(returns) < best_std:
            best_std = np.std(returns)
            best_epsilon = e
    
    # warn if best epsilon is on border
    if best_epsilon==epsilon_candidates[0]:
        warn('Epsilon on left side of interval')
    if best_epsilon==epsilon_candidates[-1]:
        warn('Epsilon on right side of interval')
    # return estimated covariance matrix
    return covariance_DRO.estimate_cov(EmpiricalCovariance(assume_centered=False).fit(X).covariance_, best_epsilon, method)

def linear_shrink(X, epsilon_candidates):
    best_epsilon, best_std = None, float('inf')
    for e in epsilon_candidates:
        returns = []
        for i in range(X.shape[0]):
            # train-validation split
            val = np.expand_dims(X[i], 0)
            train = X[np.arange(0, X.shape[0], 1) != i]
            # find the weight
            emp_cov = EmpiricalCovariance(assume_centered=False).fit(train).covariance_
            mu = np.sum(np.trace(emp_cov)) / emp_cov.shape[0]
            cov = (1. - e) * emp_cov
            cov.flat[::emp_cov.shape[0] + 1] += e * mu
            
            ones = np.ones((cov.shape[0],1))
            w = (cov@ones)/(ones.T@cov@ones)
            # calculate portfolio returns and metrics
            returns.append(val@w)
        # keep track of best epsilon
        if np.std(returns) < best_std:
            best_std = np.std(returns)
            best_epsilon = e
    
    # warn if best epsilon is on border
    if best_epsilon==epsilon_candidates[0]:
        warn('Epsilon on left side of interval')
    if best_epsilon==epsilon_candidates[-1]:
        warn('Epsilon on right side of interval')
    # return estimated covariance matrix
    emp_cov = EmpiricalCovariance(assume_centered=False).fit(X).covariance_
    mu = np.sum(np.trace(emp_cov)) / emp_cov.shape[0]
    cov = (1. - best_epsilon) * emp_cov
    cov.flat[::emp_cov.shape[0] + 1] += best_epsilon * mu
    return cov

# Testing Performance

In [8]:
print('\033[1mEmpirical Covariance:\033[0m')
mean, std, sharpe = portfolio_tester(train_X, test_X, lambda X: EmpiricalCovariance(assume_centered=False).fit(X).covariance_, verbose=True)
print('\n\033[1mLinear Shrikage:\033[0m')
mean, std, sharpe = portfolio_tester(train_X, test_X, lambda X: linear_shrink(X, np.logspace(-5,0,50)), verbose=True)
print('\n\033[1mLedoit-Wolf:\033[0m')
mean, std, sharpe = portfolio_tester(train_X, test_X, lambda X: LedoitWolf(assume_centered=False).fit(X).covariance_, verbose=True)

[1mEmpirical Covariance:[0m
Mean: 1.0657975244492974
Std: 4.359360700980689
Sharpe: 0.2444848218706779

[1mLinear Shrikage:[0m
Mean: 1.060513888888889
Std: 4.082322950563175
Sharpe: 0.2597819677011556

[1mLedoit-Wolf:[0m
Mean: 1.0657857637134516
Std: 4.358729834313328
Sharpe: 0.24451750951005086


In [9]:
print('\033[1mWasserstein:\033[0m')
mean, std, sharpe = portfolio_tester(train_X, test_X, lambda X: l1o_cv(X, 'Wasserstein', np.logspace(-5,4,50)), verbose=True)
print('\n\033[1mKullback-Leibler:\033[0m')
mean, std, sharpe = portfolio_tester(train_X, test_X, lambda X: l1o_cv(X, 'KLdirect', np.logspace(-5,2,50)), verbose=True)
print('\n\033[1mFisher-Rao:\033[0m')
mean, std, sharpe = portfolio_tester(train_X, test_X, lambda X: l1o_cv(X, 'Fisher-Rao', np.logspace(-5,2,50)), verbose=True)

[1mWasserstein:[0m
Mean: 1.0647374220729386
Std: 4.255862242923764
Sharpe: 0.25018136426837617

[1mKullback-Leibler:[0m
Mean: 1.0605529779581753
Std: 4.082658918249594
Sharpe: 0.2597701642959874

[1mFisher-Rao:[0m
Mean: 1.0617239756343908
Std: 4.116595704640032
Sharpe: 0.25791310388767735
