# Predictor

In [1]:
import pandas as pd
import numpy as np
from mosek_tools.solver import lasso as ll


def normalize(ts):
    return ts/np.linalg.norm(ts.values,2)


def lasso(X, y, lamb):
    return pd.Series(index=X.columns, data=ll(X.values, y.values, lamb))

In [2]:
    # load data from csv files
    data = pd.read_csv("data/data.csv", index_col=0, parse_dates=True)

    stock = data["GS"]
    r = stock.pct_change()

    X = pd.DataFrame({a: r.ewm(com=a, min_periods=30).mean() for a in [2, 3, 5, 8, 13, 21, 34, 55, 89]})

    # shift returns as we are trying to predict the next day return...
    y = r.shift(-1)

    X = X.truncate(before="01-02-2010").fillna(0.0)
    y = y.truncate(before="01-02-2010").fillna(0.0)

    X = X.apply(normalize)
    y = normalize(y)
    w = lasso(X, y, 0.005)

    print(w)
    print(np.corrcoef((X*w).sum(axis=1), y))

2    -5.985250e-02
3     1.807480e-08
5     4.789660e-02
8     7.004795e-08
13   -2.242114e-08
21   -4.242050e-03
34   -4.630103e-03
55   -1.816950e-08
89   -7.477504e-09
dtype: float64
[[1.         0.03817378]
 [0.03817378 1.        ]]
