In [4]:
import pandas as pd
import numpy as np
from mosekTools.solver import lasso as ll


def normalize(ts):
    return ts/np.linalg.norm(ts.values,2)


def lasso(X, y, lamb):
    return pd.Series(index=X.columns, data=ll(X.values, y.values, lamb))

In [5]:
    # load data from csv files
    data = pd.read_csv("data/data.csv", index_col=0, parse_dates=True)

    stock = data["GS"]
    r = stock.pct_change()

    X = pd.DataFrame({a: r.ewm(com=a, min_periods=30).mean() for a in [2, 3, 5, 8, 13, 21, 34, 55, 89]})

    # shift returns as we are trying to predict the next day return...
    y = r.shift(-1)

    X = X.truncate(before="01-02-2010").fillna(0.0)
    y = y.truncate(before="01-02-2010").fillna(0.0)

    X = X.apply(normalize)
    y = normalize(y)
    w = lasso(X, y, 0.005)

    print(w)
    print(np.corrcoef((X*w).sum(axis=1), y))

2    -6.176222e-02
3     3.563169e-08
5     9.700111e-07
8     2.172137e-05
13    3.731714e-02
21    1.631824e-07
34    1.139879e-07
55   -3.839981e-08
89   -1.590286e-02
dtype: float64
[[ 1.          0.05134544]
 [ 0.05134544  1.        ]]
