In [None]:
from functions import *
from parameters import *
from functions2 import *

from sklearn.linear_model import Lasso
from sklearn.linear_model import lasso_path
import torch
import torch.nn.functional as F

from statsmodels.nonparametric.smoothers_lowess import lowess

from tqdm import tqdm
from IPython.display import display, HTML

In [None]:
LOG( "Data (data-frame)" )
filename = "raw/data_ml.csv"
LOG( f"  Reading {filename} [20 seconds]" )
d = pd.read_csv(filename)
d['date'] = pd.to_datetime( d['date'] )

predictors = list( signs.keys() )
target = 'R1M_Usd'

LOG( "Data (list of matrices)" )
LOG( "  Reading data/data_ml.pickle" )
dd = load( "data/data_ml.pickle" )

# Compute portfolio weights; optimize the information ratio

In [None]:
x, y, universe = get_data_3(date=DATE1, signs=signs)

universe = universe.reshape( y.shape[0], y.shape[1], 1 )
y = y.reshape( y.shape[0], y.shape[1], 1 )
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)
universe = torch.tensor(universe, dtype=torch.float32)

In [None]:
class Linear3(torch.nn.Module):
    def __init__(self,k):
        super(Linear3,self).__init__()
        self.linear = torch.nn.Linear(k,1)
    def forward(self,xs):
        x, universe = xs
        # x is n×l×k; the linear layer is applied on the last dimension
        y = self.linear(x)   # n×l×1
        p = y.exp()             # Use a softplus instead of an exponential?
        p = p * universe
        p = p[:,:,0]         #  n×l
        p = p / ( 1e-16 + p.sum(axis=0) )  # portolio weights: positive, sum up to 1 for each date
        return p
    
model = Linear3(x.shape[2])
# model( (x,universe) ).detach().numpy().sum(axis=0)   # Should sum to 1 for each date

In [None]:
## Loop to maximize the IR

LOG( "[LONG] 50 minutes for 10,000 epochs" )

optimizer = torch.optim.Adam(model.parameters())
N = 10_000
IRs = np.nan * np.zeros(N)
pbar = tqdm(range(N))
for t in pbar:
    w = model( (x,universe) )
    ratio_returns = w * y[:,:,0].expm1()     # y already contains the forward returns
    ratio_returns = ratio_returns.sum(axis=0)
    log_returns = ratio_returns.log1p()
    IR = log_returns.mean() / log_returns.std()
    loss = -IR
    IRs[t] = IR.item()
    pbar.set_description( f"IR={IR.item():.3f}" )
    if not np.isfinite( loss.item() ):
        LOG( f"{t} PROBLEM" )
        break
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

fig, ax = plt.subplots()
ax.plot( IRs )
ax.set_xlabel("Epoch")
ax.set_ylabel("IR")
ax.set_xscale('log')
fig.savefig("plots/model3_linear_IR_loss.pdf")
plt.show()

In [None]:
x, y, universe = get_data_3(all=True, signs=signs)
universe = universe.reshape( y.shape[0], y.shape[1], 1 )
y = y.reshape( y.shape[0], y.shape[1], 1 )
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)
universe = torch.tensor(universe, dtype=torch.float32)

signal = model( (x,universe) ).detach().numpy()

trailing_log_returns = LAG( np.log1p( dd[ 'R1M_Usd' ] ) )
y = trailing_log_returns.copy()
y.fillna(0, inplace=True)

assert signal.shape == y.shape
signal = pd.DataFrame( signal, index = y.index, columns = y.columns )

res = signal_backtest(signal, y, date=DATE1)

fig, ax = plt.subplots()
for i in range(6):
    ax.plot( res['dates'], res['prices'].iloc[i,:], color = quintile_colours[i] )
ax.axvline( pd.to_datetime(DATE1), color='black', linewidth=1 )
ax.set_yscale('log')
ax.set_title('Maximizing the IR (signal)')
ax.text(0.02, .97, f"μ={100*res['out-of-sample'].iloc[5,:]['CAGR']:.1f}%",                  horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
ax.text(0.02, .90, f"σ={100*res['out-of-sample'].iloc[5,:]['Annualized Volatility']:.1f}%", horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
ax.text(0.02, .83, f"IR={res['out-of-sample'].iloc[5,:]['Information Ratio']:.2f}",         horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
fig.savefig("plots/model3_linear_IR_wealth.pdf")
plt.show()

res['out-of-sample']

In [None]:
# Backtest the strategy actually learned

r = compute_portfolio_returns( signal, np.expm1(trailing_log_returns) ) 
p = np.exp(cumsum_na(r))               # Log-price = cummulated log-returns
p = replace_last_leading_NaN_with_1(p) # "cumsum" is not the exact inverse of "diff" -- it discards the first value, 1: put it back
s = analyze_returns( r[ r.index > DATE1 ], as_df = True )

fig, ax = plt.subplots()
for i in range(5):
    ax.plot( res['dates'], res['prices'].iloc[i,:], color = quintile_colours[i] )
ax.plot( p.index, p, color='black' )
ax.axvline( pd.to_datetime(DATE1), color='black', linewidth=1 )
ax.set_yscale('log')
ax.set_title('Maximizing the IR (weights)')
ax.text(0.02, .97, f"μ={100*s.iloc[0,:]['CAGR']:.1f}%",                  horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
ax.text(0.02, .90, f"σ={100*s.iloc[0,:]['Annualized Volatility']:.1f}%", horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
ax.text(0.02, .83, f"IR={s.iloc[0,:]['Information Ratio']:.2f}",         horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
fig.savefig("plots/model3_linear_IR_wealth_weights.pdf")
plt.show()

s

In [None]:
LOG( "Done." )