In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

Problem 2a)

Creating plot illustrating bias-variance tradeof

In [None]:
np.random.seed(42)

#Datapoints and bootstrap resamples
n = 100
bootstraps = 1000

#Data
x = np.linspace(-3, 3, n)
y = np.exp(-(x**2)) + 1.5 * np.exp(-((x - 2) ** 2)) + np.random.normal(0, 0.1)


#Lists for storing
biases = []
variances = []
mses = []

#Loop over poly deg from 1 to 9
for p in range(1, 10):
   
   
   poly = PolynomialFeatures(p)
   X = poly.fit_transform(x.reshape(-1,1))

   #split into test train
   X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2)

   predictions = np.zeros((bootstraps,X_test.shape[0]))

   #Bootstrap resampling
   for b in range(bootstraps):
       X_train_re, y_train_re = resample(X_train,y_train)
       model = LinearRegression()
       predictions[b, :] = model.fit(X_train_re, y_train_re).predict(X_test).ravel()
    
    #Calculate different metrics
   biases.append(np.mean((y_test-np.mean(predictions,axis = 0)) ** 2))
   variances.append(np.mean(np.var(predictions, axis = 0)))
   mses.append(np.mean((y_test - predictions) ** 2))

#Plot
plt.plot(range(1, 10), biases, label="Bias")
plt.plot(range(1, 10), variances, label="Variance")
plt.plot(range(1, 10), mses, label="MSE")
plt.xlabel("Polynomial Degree")
plt.ylabel("Error")
plt.legend()
plt.title("Bias–Variance Tradeoff with Bootstrapping")
plt.savefig("Bias-Variance-Tradeoff.pdf")
plt.show()
#Datapoints and bootstrap resamples
n = 100
bootstraps = 1000

#Data
x = np.linspace(-3, 3, n)
y = np.exp(-(x**2)) + 1.5 * np.exp(-((x - 2) ** 2)) + np.random.normal(0, 0.1)


#Lists for storing
biases = []
variances = []
mses = []

#Loop over poly deg from 1 to 9
for p in range(1, 10):
   
   
   poly = PolynomialFeatures(p)
   X = poly.fit_transform(x.reshape(-1,1))

   #split into test train
   X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2)

   predictions = np.zeros((bootstraps,X_test.shape[0]))

   #Bootstrap resampling
   for b in range(bootstraps):
       X_train_re, y_train_re = resample(X_train,y_train)
       model = LinearRegression()
       predictions[b, :] = model.fit(X_train_re, y_train_re).predict(X_test).ravel()
    
    #Calculate different metrics
   biases.append(np.mean((y_test-np.mean(predictions,axis = 0)) ** 2))
   variances.append(np.mean(np.var(predictions, axis = 0)))
   mses.append(np.mean((y_test - predictions) ** 2))

#Plot
plt.plot(range(1, 10), biases, label="Bias")
plt.plot(range(1, 10), variances, label="Variance")
plt.plot(range(1, 10), mses, label="MSE")
plt.xlabel("Polynomial Degree")
plt.ylabel("Error")
plt.legend()
plt.title("Bias–Variance Tradeoff with Bootstrapping")
plt.savefig("Bias-Variance-Tradeoff.pdf")
plt.show()

Problem 2d)

Creating a heatmap showing the MSE of a Ridge regression model for various polynomial degrees and lambda values

In [None]:
# Data
np.random.seed(42)
n = 100
x = np.linspace(-3, 3, n)
y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2) + np.random.normal(0, 0.1)

# Parameter ranges
degrees = np.arange(1, 11)        # polynomial deg 1–10
lambdas = np.logspace(-4, 4, 9)   # lmb values from 1e-4 to 1e4

# Matrix to store MSE values
MSE = np.zeros((len(degrees), len(lambdas)))

#Ridge
def Ridge_parameters(X, y, lmd):
    return np.linalg.inv(X.T @ X + lmd*np.identity(X.shape[1])) @ X.T @ y


# Loop over polynomial degrees and lambda values
for i, deg in enumerate(degrees):

    #Design matrix
    poly = PolynomialFeatures(degree=deg, include_bias = False)
    X = poly.fit_transform(x.reshape(-1, 1))

    #Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    #Scale
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_s = scaler.transform(X_train)
    X_test_s = scaler.transform(X_test)
    

    #Loop over lambda
    for j, lmb in enumerate(lambdas):
        beta = Ridge_parameters(X_train_s, y_train, lmb)
        y_tilde = X_test_s @ beta 
        MSE[i, j] = mean_squared_error(y_test, y_tilde)


#Plot heatmap
fig, ax = plt.subplots()
im = ax.imshow(MSE, origin = "lower")

# Show all ticks and label them with the respective list entries
ax.set_xticks(range(len(lambdas)), labels=lambdas, rotation=45, ha="right", rotation_mode="anchor")
ax.set_yticks(range(len(degrees)), labels=degrees)

# Colorbar
cbar = fig.colorbar(im, ax=ax)
cbar.set_label("Mean Squared Error")

#Set title and label axis
ax.set_title("MSE as function of lambda and poly deg")
ax.set_xlabel("Lambda")
ax.set_ylabel("Polynomial degree")
fig.tight_layout()
plt.savefig("Heatmap-Ridge.pdf")
plt.show()