In [1]:
# Standard includes
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
matplotlib.rc('xtick', labelsize=14) 
matplotlib.rc('ytick', labelsize=14)

In [2]:
# Now a higher-dimensional data set, but not enough points
# Generate synthetic data
n = 101
d = 100
noisestd = 1.0
#
x = np.matrix(np.random.normal(size=(n,d)))
w = np.matrix(np.zeros((d,1)))
for i in range(0,10):
    w[i] = 1.0
e = np.matrix(np.random.normal(size=(n,1)) * noisestd)
#
y = x * w + e
#
# Also generate a test set
tx = np.matrix(np.random.normal(size=(n,d)))
te = np.matrix(np.random.normal(size=(n,1)) * noisestd)
#
ty = tx * w + te

In [12]:
np.var(ty)

12.372237559711175

In [4]:
# Now regress y on x using least-squares
regr = linear_model.LinearRegression()
regr.fit(x, y)

# Print coefficients
print('Slope, intercept: ', regr.coef_, regr.intercept_)

# Make predictions using the model and compute squared error
y_pred = regr.predict(x)
print("Mean squared error on training set: %.2f" % mean_squared_error(y, y_pred))

# Make predictions using the model and compute squared error
ty_pred = regr.predict(tx)
print("Mean squared error on test set: %.2f" % mean_squared_error(ty, ty_pred))

('Slope, intercept: ', array([[ -5.52853618e+00,   1.26704008e+00,  -4.31901424e-01,
          3.31725884e-03,   2.69288020e+00,   2.26205977e+00,
          1.73752428e+00,   4.51496823e+00,   2.72745611e+00,
          3.20530459e+00,  -4.32057229e-01,   1.02909995e+00,
         -2.89958392e+00,   7.80318659e-01,  -1.12199120e+00,
         -3.32010151e-01,  -7.69629041e-01,  -2.31797681e+00,
          2.95538636e+00,  -1.22842887e+00,  -3.97132298e-01,
          2.53977186e-01,   1.89296425e+00,   1.05887633e+00,
         -8.10042486e-02,   2.56009617e-01,   4.02357292e+00,
          1.56417472e+00,  -2.36046955e-01,   1.91266005e+00,
          2.73729389e+00,   2.78174443e-01,   4.81500190e+00,
         -2.55250807e-01,   3.28416527e-01,   2.23016826e+00,
         -2.50798798e+00,  -4.41417201e-01,   5.64501874e+00,
         -3.45475282e+00,  -4.21234149e+00,   2.39003427e+00,
         -3.32089696e-01,  -9.39512360e-01,  -1.90441984e+00,
         -1.24951880e+00,   1.55415137e+00,  -9

In [23]:
# Now regress y on x using ridge regression
regr = linear_model.Ridge(alpha=0.1)
regr.fit(x, y)

# Print coefficients
print('Slope, intercept: ', regr.coef_, regr.intercept_)

# Make predictions using the model and compute squared error
y_pred = regr.predict(x)
print("Mean squared error on training set: %.2f" % mean_squared_error(y, y_pred))

# Make predictions using the model and compute squared error
ty_pred = regr.predict(tx)
print("Mean squared error on test set: %.2f" % mean_squared_error(ty, ty_pred))

('Slope, intercept: ', array([[  1.87991523e-01,   7.14063761e-01,   8.84440708e-01,
          1.37247155e+00,   8.33902669e-01,   1.82161074e+00,
          1.28437674e+00,   8.77802850e-01,   1.46028448e+00,
          9.32121222e-01,  -3.86183809e-02,  -1.19841029e-01,
         -3.28805027e-01,  -6.27741145e-01,  -5.01693384e-01,
         -1.90151464e-01,  -2.33773876e-01,  -7.76942793e-02,
         -1.47309246e-01,  -5.31419209e-03,   1.27396412e-01,
         -3.48590162e-01,   1.63163858e-01,  -2.27145535e-01,
         -3.31099315e-01,   5.27893547e-02,   3.03351200e-01,
          2.08888604e-01,   2.61949325e-01,   3.38812173e-01,
          4.60034431e-01,  -2.98211740e-01,  -2.03867438e-01,
         -9.68371293e-01,   6.79640587e-02,   2.45506167e-01,
          1.30994685e-01,  -5.70125420e-02,  -2.22910549e-01,
         -4.25785376e-01,  -4.57812585e-01,   3.18188372e-01,
         -1.22484173e-01,   5.64568708e-02,  -2.73624797e-01,
         -1.88190977e-01,   2.48030660e-01,   1

In [25]:
values = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]

for v in values:
    regr = linear_model.Ridge(alpha=v)
    regr.fit(x, y)
    print ("Lambda: ", v)

    # Make predictions on training set
    y_pred = regr.predict(x)
    print("Mean squared error on training set: %.2f" % mean_squared_error(y, y_pred))

    # Make predictions using the model and compute squared error
    ty_pred = regr.predict(tx)
    print("Mean squared error on test set: %.2f" % mean_squared_error(ty, ty_pred))


('Lambda: ', 1e-05)
Mean squared error on training set: 0.00
Mean squared error on test set: 585.81
('Lambda: ', 0.0001)
Mean squared error on training set: 0.00
Mean squared error on test set: 564.28
('Lambda: ', 0.001)
Mean squared error on training set: 0.00
Mean squared error on test set: 404.08
('Lambda: ', 0.01)
Mean squared error on training set: 0.01
Mean squared error on test set: 83.48
('Lambda: ', 0.1)
Mean squared error on training set: 0.03
Mean squared error on test set: 19.26
('Lambda: ', 1.0)
Mean squared error on training set: 0.07
Mean squared error on test set: 7.02
('Lambda: ', 10.0)
Mean squared error on training set: 0.35
Mean squared error on test set: 2.84
('Lambda: ', 100.0)
Mean squared error on training set: 2.40
Mean squared error on test set: 5.79
('Lambda: ', 1000.0)
Mean squared error on training set: 8.19
Mean squared error on test set: 10.97
('Lambda: ', 10000.0)
Mean squared error on training set: 10.83
Mean squared error on test set: 12.63


In [31]:
# Now regress y on x using Lasso
regr = linear_model.Lasso(alpha=0.2)
regr.fit(x, y)

# Print coefficients
print('Slope, intercept: ', regr.coef_, regr.intercept_)

# Make predictions using the model and compute squared error
y_pred = regr.predict(x)
print("Mean squared error on training set: %.2f" % mean_squared_error(y, y_pred))

# Make predictions using the model and compute squared error
ty_pred = regr.predict(tx)
print("Mean squared error on test set: %.2f" % mean_squared_error(ty, ty_pred))

('Slope, intercept: ', array([ 0.71684698,  0.77843527,  0.74401769,  0.77285114,  0.80797283,
        0.95866651,  0.66708465,  0.857495  ,  0.89680954,  0.69326906,
       -0.        ,  0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.        , -0.        ,  0.        ,  0.        ,
       -0.        ,  0.08008983,  0.        ,  0.        , -0.        ,
       -0.        ,  0.        ,  0.        , -0.        , -0.        ,
        0.        ,  0.        , -0.     

In [27]:
values = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]

for v in values:
    regr = linear_model.Lasso(alpha=v)
    regr.fit(x, y)
    print ("Lambda: ", v)

    # Make predictions on training set
    y_pred = regr.predict(x)
    print("Mean squared error on training set: %.2f" % mean_squared_error(y, y_pred))

    # Make predictions using the model and compute squared error
    ty_pred = regr.predict(tx)
    print("Mean squared error on test set: %.2f" % mean_squared_error(ty, ty_pred))



('Lambda: ', 1e-05)
Mean squared error on training set: 0.02
Mean squared error on test set: 32.44
('Lambda: ', 0.0001)
Mean squared error on training set: 0.02
Mean squared error on test set: 28.79
('Lambda: ', 0.001)
Mean squared error on training set: 0.03
Mean squared error on test set: 14.82
('Lambda: ', 0.01)
Mean squared error on training set: 0.15
Mean squared error on test set: 3.08
('Lambda: ', 0.1)
Mean squared error on training set: 0.80
Mean squared error on test set: 1.47
('Lambda: ', 1.0)
Mean squared error on training set: 8.86
Mean squared error on test set: 11.07
('Lambda: ', 10.0)
Mean squared error on training set: 11.24
Mean squared error on test set: 12.86
('Lambda: ', 100.0)
Mean squared error on training set: 11.24
Mean squared error on test set: 12.86
('Lambda: ', 1000.0)
Mean squared error on training set: 11.24
Mean squared error on test set: 12.86
('Lambda: ', 10000.0)
Mean squared error on training set: 11.24
Mean squared error on test set: 12.86


