Ridge regression
---

In [None]:
import pandas as pd

# Load the data
data_df = pd.read_csv('data-points.csv')

# Print shape
print('Shape:', data_df.shape)

# First five rows
data_df.head()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

# Extract x, y data
x = data_df.x.values
y = data_df.y.values

# Plot data points
plt.scatter(x, y)

# Plot sine curve for reference
x_values = np.linspace(min(x), max(x), num=100)
y_sine = np.sin(x_values)
plt.plot(x_values, y_sine, c='C3', label='sine curve')
plt.legend()
plt.show()

In [None]:
from sklearn.preprocessing import PolynomialFeatures

# Create the polynomial features
poly_obj = PolynomialFeatures(degree=10, include_bias=False)
X_poly = poly_obj.fit_transform(x[:, np.newaxis])

print('Shape:', X_poly.shape)
print('Features:', poly_obj.get_feature_names())

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_rescaled = scaler.fit_transform(X_poly)

In [None]:
X_rescaled.mean(axis=0)

In [None]:
X_rescaled.std(axis=0)

In [None]:
from sklearn.linear_model import LinearRegression

# Linear regression
lr = LinearRegression()
lr.fit(X_rescaled, y)

# Pass sample x values through the preprocessing steps
X_values_rescaled = scaler.transform(
    poly_obj.transform(x_values[:, np.newaxis])
)
y_values_lr = lr.predict(X_values_rescaled)

# Plot the model
plt.scatter(X_rescaled[:, 0], y)
plt.plot(X_values_rescaled[:, 0], y_values_lr, c='C3', label='linear regression')
plt.legend()
plt.show()

In [None]:
from sklearn.linear_model import Ridge

# Ridge regression
ridge = Ridge()
ridge.fit(X_rescaled, y)

# Compute predictions
y_values_ridge = ridge.predict(X_values_rescaled)

# Plot the model
plt.scatter(X_rescaled[:, 0], y)
plt.plot(X_values_rescaled[:, 0], y_values_ridge, c='C3', label='rigde ression')
plt.legend()
plt.show()

In [None]:
# Ridge regression
ridge2 = Ridge(alpha=1e-4)
ridge2.fit(X_rescaled, y)

# Compute predictions
y_values_ridge2 = ridge2.predict(X_values_rescaled)

# Plot the model
plt.scatter(X_rescaled[:, 0], y)
plt.plot(X_values_rescaled[:, 0], y_values_ridge2, c='C3', label='tuned ridge')
plt.legend()
plt.show()

In [None]:
# Linear regression coefficients
features = poly_obj.get_feature_names()
for feature, coef in zip(features, lr.coef_):
    print('{:<6}: {:>10.1f}'.format(feature, coef))

In [None]:
# Ridge regression coefficients
for feature, coef in zip(features, ridge2.coef_):
    print('{:<6}: {:>4.1f}'.format(feature, coef))

In [None]:
from sklearn.linear_model import Lasso

# Lasso regression
lasso = Lasso(alpha=1e-4, max_iter=1e5)
lasso.fit(X_rescaled, y)

# Compute predictions
y_values_lasso = lasso.predict(X_values_rescaled)

# Plot the model
plt.scatter(X_rescaled[:, 0], y)
plt.plot(X_values_rescaled[:, 0], y_values_lasso, c='C3', label='lasso')
plt.legend()
plt.show()

In [None]:
# Lasso regression coefficients
for feature, coef in zip(features, lasso.coef_):
    print('{:<6}: {:>4.1f}'.format(feature, coef))