# Empirical Estimator

Use historical data to estimate assets' expected return and covariance matrix.

## Data

In [1]:
from plotly.io import show
from sklearn.model_selection import train_test_split

from skfolio import Population, RiskMeasure
from skfolio.datasets import load_sp500_dataset
from skfolio.moments import DenoiseCovariance, ShrunkMu
from skfolio.optimization import MeanRisk, ObjectiveFunction
from skfolio.preprocessing import prices_to_returns
from skfolio.prior import EmpiricalPrior

prices = load_sp500_dataset()
X = prices_to_returns(prices)
X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)

In [2]:
X_train.head()

Unnamed: 0_level_0,AAPL,AMD,BAC,BBY,CVX,GE,HD,JNJ,JPM,KO,LLY,MRK,MSFT,PEP,PFE,PG,RRC,UNH,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1990-01-03,0.007576,-0.030303,0.008045,0.118056,-0.016229,-0.001876,0.003581,0.004072,0.033589,-0.014318,0.000000,0.015896,0.005208,-0.009709,0.002938,-0.001813,0.000000,-0.019355,0.000000,-0.010079
1990-01-04,0.003759,-0.015500,-0.021355,-0.012422,-0.012831,-0.005639,0.006244,0.002028,0.003991,-0.004993,-0.005557,-0.015647,0.028497,-0.009804,0.016602,-0.019725,0.000000,-0.009868,-0.005201,-0.009933
1990-01-05,0.003745,-0.031996,-0.021821,0.000000,-0.014855,-0.009452,-0.013298,-0.010408,0.003975,-0.008212,-0.010874,-0.020641,-0.025189,-0.013991,-0.008646,-0.018004,0.000000,-0.043189,-0.010732,-0.005267
1990-01-08,0.003731,0.000000,0.005633,-0.075472,0.009424,0.005725,-0.009883,0.016944,0.000000,0.021159,0.000000,0.012839,0.015504,0.018118,-0.008721,0.018334,0.000000,-0.020833,0.013630,0.015381
1990-01-09,-0.007435,0.016527,0.000000,0.000000,-0.007469,-0.020803,-0.026316,-0.031026,-0.031957,-0.007658,-0.011147,-0.007893,-0.002545,-0.013722,-0.021505,0.000000,0.000000,-0.024823,-0.026619,-0.020114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-01-30,0.012816,-0.011730,-0.030118,-0.002382,-0.005300,-0.006836,-0.002225,0.002297,-0.005375,0.000297,0.001268,0.009628,0.012988,-0.006079,0.004664,-0.016952,0.005738,0.000982,0.009707,-0.003950
2012-01-31,0.007636,-0.004451,0.008485,-0.056332,-0.002803,-0.010043,-0.008486,0.003049,0.007827,0.001017,0.012494,-0.015946,-0.002708,0.003985,-0.008386,-0.002682,-0.004143,0.014080,0.000989,-0.020481
2012-02-01,-0.000577,0.028316,0.032307,0.009638,-0.003197,0.003203,0.001782,-0.003354,0.008025,0.004743,0.001488,0.009413,0.012219,0.010804,0.006191,0.002689,0.017553,0.028391,0.013366,0.002756
2012-02-02,-0.002383,0.004348,0.012225,0.004952,0.008753,-0.001068,-0.000204,-0.001514,-0.001321,-0.000295,-0.005030,-0.004918,0.001970,0.000294,-0.009380,0.001751,-0.002564,-0.013523,-0.003857,-0.005251


## Model

Maximize Sharpe Ratio with Empirical Estimator.

In [4]:
max_sharpe_model_empirical = MeanRisk(
    risk_measure=RiskMeasure.VARIANCE,
    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
    prior_estimator=EmpiricalPrior(
        mu_estimator=ShrunkMu(), covariance_estimator=DenoiseCovariance()
    ),
    portfolio_params=dict(name="Empirical - ShrunkMu & DenoiseCovariance")
)
max_sharpe_model_empirical.fit(X_train)
max_sharpe_model_empirical.weights_

array([5.29957573e-02, 5.62258516e-07, 2.19431588e-07, 5.78329525e-02,
       1.05704853e-01, 6.42635887e-07, 1.25145591e-02, 1.64813030e-01,
       3.95272190e-07, 8.40687639e-02, 1.20296728e-06, 1.33374541e-06,
       6.51482298e-02, 7.44911613e-02, 7.02327402e-06, 1.27177410e-01,
       3.87852705e-02, 6.81199403e-02, 4.34872838e-02, 1.04849409e-01])

Benchmark: Maximize Sharpe Ratio with default moments estimators.

In [5]:
benchmark_model = MeanRisk(
    risk_measure=RiskMeasure.VARIANCE,
    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
    portfolio_params=dict(name="Benchmark - Moments Estimators")
)
benchmark_model.fit(X_train)
benchmark_model.weights_

array([9.43631399e-02, 1.13184579e-06, 5.04970598e-07, 1.20834667e-01,
       3.18126275e-02, 8.57806907e-07, 7.11596802e-04, 1.24104939e-01,
       9.49223801e-07, 2.77547553e-02, 1.23409042e-06, 1.37593860e-06,
       1.16299875e-01, 5.73516411e-02, 9.58498590e-06, 1.09493919e-01,
       8.64761638e-02, 1.83992252e-01, 1.32350165e-02, 3.35537683e-02])

## Prediction

In [6]:
pred_empirical = max_sharpe_model_empirical.predict(X_test)
pred_benchmark = benchmark_model.predict(X_test)

## Analysis

Naive moments estimator outperforms ShrunkMu + DenoiseCovariance.

In [8]:
population = Population([pred_empirical, pred_benchmark])
population.plot_cumulative_returns()