In [None]:
from functions import *
from functions2 import *
from parameters import *

from sklearn.linear_model import Lasso
from sklearn.linear_model import lasso_path
import torch
import torch.nn.functional as F

from statsmodels.nonparametric.smoothers_lowess import lowess

from tqdm import tqdm
from IPython.display import display, HTML

The raw data, as a CSV file.

In [None]:
LOG( "Data (data-frame)" )
filename = "raw/data_ml.csv"
LOG( f"Reading {filename} [20 seconds]" )
d = pd.read_csv(filename)
d['date'] = pd.to_datetime( d['date'] )

predictors = list( signs.keys() )
target = 'R1M_Usd'

Rather than a data-frame, it is often easier to have a list of matrices, one per variable, 
with one row per stock and one column per date: we can easily combine them or apply some function to each row, or each column.

In [None]:
LOG( "Data (list of matrices)" )
LOG( "Read data/data_ml.pickle" )
dd = load( "data/data_ml.pickle" )

# Single signals
For any of the input variables, we can divide the universe into quintiles, long the top quintile, and short the bottom quintile.

In [None]:
LOG( "Backtest a single signal" )
trailing_log_returns = LAG( np.log1p( dd[ 'R1M_Usd' ] ) )
y = trailing_log_returns.copy()
y.fillna(0, inplace=True)

for signal in predictors: 
    LOG( f"  {signal}" )
    x = dd[signal].copy() * signs[signal]
    x.fillna(.5, inplace=True)  ## Replace missing values with the median (0.5, since the predictors are uniform)
    x = np.where( dd['universe'], 1, np.nan ) * x   # Only invest in stocks in the universe
    r = signal_backtest(x, y, date = DATE1)

    fig, ax = plt.subplots()
    for i in range(6):
        ax.plot( r['dates'], r['prices'].iloc[i,:], color = quintile_colours[i] )
    ax.set_yscale('log')
    ax.set_title(signal if signs[signal]>=0 else f"- {signal}")
    ax.text(0.02, .97, f"μ={100*r['performance'].iloc[5,:]['CAGR']:.1f}%",                  horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
    ax.text(0.02, .90, f"σ={100*r['performance'].iloc[5,:]['Annualized Volatility']:.1f}%", horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
    ax.text(0.02, .83, f"IR={r['performance'].iloc[5,:]['Information Ratio']:.2f}",         horizontalalignment='left', verticalalignment='top', transform = ax.transAxes)
    fig.savefig(f'plots/signal_wealth_{signal}.pdf')
    plt.show()

    display(r['performance'])
    
LOG("Done.")