In [2]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

# Load the diabetes dataset
X, y = datasets.load_diabetes(as_frame=True, scaled=False, return_X_y=True)

# Perform cross-validation on polynomial models ranging from degree 0 to 8
cv_results = []
for degree in range(9):
    if degree == 0:
        poly_features = PolynomialFeatures(degree=1, include_bias=True)
    else:
        poly_features = PolynomialFeatures(degree=degree, include_bias=False)
    

    X_poly = poly_features.fit_transform(X)

    linear_model = LinearRegression()

    scores = cross_val_score(linear_model, X_poly, y, cv=5, scoring='r2')
    mean_r2 = scores.mean()
    mean_mae = -cross_val_score(linear_model, X_poly, y, cv=5, scoring='neg_mean_absolute_error').mean()
    cv_results.append((degree, mean_r2, mean_mae))




This code performs cross-validation on polynomial models of different degrees ranging from 0 to 8. For each degree, it creates a polynomial feature transformer using PolynomialFeatures from scikit-learn. If the degree is 0, it sets degree=1 and includes the bias term by setting include_bias=True to avoid an empty output array.

The cross_val_score function is used to perform cross-validation on the model. It computes the R-Squared scores by default. The mean R-Squared score and mean MAE are calculated and stored in the cv_results list for each degree.

In [3]:
# Construct a table summarizing the cross-validation results
print("Model\t\tR-Squared\t\tMAE\n")
for result in cv_results:
    print(f"Degree {result[0]}:\t{result[1]:.3f}\t\t{result[2]:.3f}")


Model		R-Squared		MAE

Degree 0:	0.482		44.276
Degree 1:	0.482		44.276
Degree 2:	0.392		46.613
Degree 3:	-158.689		273.485
Degree 4:	-571.083		657.260
Degree 5:	-436.857		562.994
Degree 6:	-1696.116		742.981
Degree 7:	-5530.894		1032.682
Degree 8:	-16076.255		1475.659


It prints a table summarizing the cross-validation results. It displays the degree, R-Squared score, and MAE for each model.



In [4]:

# Identify the best model based on R-Squared and MAE metrics
best_model = max(cv_results, key=lambda x: x[1])
best_degree = best_model[0]
best_r2 = best_model[1]
best_mae = best_model[2]

print("\nBest Model:")
print(f"Degree: {best_degree}")
print(f"R-Squared: {best_r2:.3f}")
print(f"MAE: {best_mae:.3f}")


Best Model:
Degree: 0
R-Squared: 0.482
MAE: 44.276


It identifies the best model by finding the maximum R-Squared score. It retrieves the degree, R-Squared score, and MAE of the best model from the cv_results list. 