#### Build a LASSO Model

In [18]:
import pandas as pd
from sklearn.linear_model import LassoLarsIC
from lib.munge import add_lags, split_sample_bytime

## 1. Read the data serialized by visualize.ipynb
data = pd.read_pickle('../data/rates.p')

## 2. Add up to 3 lags and drop holes created from lagging, or holidays.
lagged = add_lags(data, 3).dropna()

## 3. Train/test split.
lagged.index = pd.to_datetime(lagged.index, utc=True)
X_train, y_train, X_holdout, y_holdout = split_sample_bytime(lagged)

## 4. Train model.
model = LassoLarsIC(criterion = 'bic',
                    normalize = True,
                    positive = False).fit(X_train, y_train)

#### Compare the training set error and the out-of-time holdout error.

In [19]:
R2_train = model.score(X_train, y_train)
R2_holdout = model.score(X_holdout, y_holdout)
R2_table = pd.DataFrame({'Training Set': R2_train,
                         'Holdout Set': R2_holdout},
                        index = ['R^2']
                       ).round(3)
R2_table

Unnamed: 0,Training Set,Holdout Set
R^2,0.979,0.942


In [41]:
pred_vs_act = pd.DataFrame({'Predicted': model.predict(X_holdout),
                            'Actual': y_holdout})
pred_vs_act.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f170678b710>

#### Print the coefficients of the LASSO model.

In [27]:
coefficients = pd.DataFrame(model.coef_, 
                            index = X_train.columns,
                            columns = ['Coefficient']
                           ).applymap(lambda x: str(round(x)) if x == 0 else x)
coefficients

Unnamed: 0,Coefficient
15-Year Mortgage,0.281132
5/1 Adjustable Rate Mortgage,0.0
Origination Fees/Discounts for 30-Year Mortgage,0.0
Origination Fees/Discounts for 15-Year Mortgage,0.0
Origination Fees/Discounts for 5/1 Adjustable Rate Mortgage,0.0
30-Year Mortgage lag1,0.680522
30-Year Mortgage lag2,0.0
30-Year Mortgage lag3,0.0
15-Year Mortgage lag1,0.0
15-Year Mortgage lag2,0.0
