In [111]:
#Importing the libraries

import numpy as np
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt 
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

In [112]:
#Loading the dataset

from sklearn import datasets
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True,scaled=False, as_frame=True)

In [131]:

cross_val_scores = []
degrees = np.arange(0,9)

# Perform cross-validation for each degree
for degree in degrees:
    # Create a polynomial regression model for the current degree
    model = make_pipeline(PolynomialFeatures(degree), LinearRegression())

    # Calculate cross-validation scores using the mean squared error (MSE)
    r_squared = cross_val_score(model, diabetes_X, diabetes_y, cv=5, scoring='r2')
    mean_r2 = r_squared.mean()
    std_r2 = r_squared.std()
    mse = -cross_val_score(model, diabetes_X, diabetes_y, cv=5, scoring='neg_mean_squared_error')
    mean_mse = mse.mean()
    std_mse = mse.std()
    mae = -cross_val_score(model, diabetes_X, diabetes_y, cv=5, scoring='neg_mean_absolute_error')
    mean_mae = mae.mean()
    std_mae = mae.std()
    mape = -cross_val_score(model, diabetes_X, diabetes_y, cv=5, scoring='neg_mean_absolute_percentage_error')
    mean_mape = mape.mean()
    std_mape = mape.std()

    # Append the scores to the list
    cross_val_scores.append([degree, mean_r2, std_r2, mean_mae, std_mae, mean_mape, std_mape, mean_mse, std_mse])


In [132]:
# Converting the list to a DataFrame
import pandas as pd
df = pd.DataFrame(cross_val_scores, columns=['Degree','Mean R-Squared','STD R-Squared', 'Mean MAE', 'STD MAE', 'Mean MAPE', 'STD MAPE', 'Mean MSE', 'STD MSE'])
df


Unnamed: 0,Degree,Mean R-Squared,STD R-Squared,Mean MAE,STD MAE,Mean MAPE,STD MAPE,Mean MSE,STD MSE
0,0,-0.027506,0.036772,66.045624,3.47466,0.623622,0.06561,5982.413,547.2524
1,1,0.482316,0.049269,44.276499,2.10011,0.39486,0.029602,2993.081,150.771
2,2,-0.841362,1.656913,68.304484,20.620472,0.583773,0.114236,10379.47,8439.139
3,3,-199.796464,226.976489,330.432572,147.820363,2.261685,0.922023,1147209.0,1235182.0
4,4,-571.083108,369.891883,657.260477,159.475902,5.232018,0.876845,3402772.0,2322364.0
5,5,-436.856887,379.100423,562.993636,59.917202,4.570284,0.922352,2644547.0,2458059.0
6,6,-1696.799731,2635.123484,742.694951,191.065386,6.187042,2.557326,10581130.0,16812540.0
7,7,-5530.894074,9518.586954,1032.681725,393.439617,8.666399,5.149762,34758830.0,60562690.0
8,8,-16076.255118,28049.952581,1475.658532,706.280274,12.474617,9.111319,101245000.0,178350100.0


In [134]:

best_r2_model = degrees[np.argmax(df['Mean R-Squared'])]
best_mae_model = degrees[np.argmin(df['Mean MAE'])]
best_mse_model = degrees[np.argmin(df['Mean MSE'])]
best_mape_model = degrees[np.argmin(df['Mean MAPE'])]

print(f"\nBest model based on R-Squared: Degree {df['Degree'][best_r2_model]}")
print(f"Best model based on MAE: Degree {df['Degree'][best_mae_model]}")
print(f"Best model based on MSE: Degree {df['Degree'][best_mse_model]}")
print(f"Best model based on MAPE: Degree {df['Degree'][best_mape_model]}")


Best model based on R-Squared: Degree 1
Best model based on MAE: Degree 1
Best model based on MSE: Degree 1
Best model based on MAPE: Degree 1


On the basis of the results, degree 1 has the highest R-Squared Value and the lowest values for MAE, MSE and MAPE. In case of other polynomial models, the R2 value is negative and the values of MSE, MAE and MAPE is increasing, which indicates that the model is performing poorly. 

In [123]:
#Fitting degree 1 model to the diabetes data

cross_valid = PolynomialFeatures(degree=1, include_bias=False)
cross_valid_fit = cross_valid.fit_transform(diabetes_X)
model = LinearRegression()

In [124]:
model.fit(diabetes_X, diabetes_y)

In [126]:
ypred = model.predict(diabetes_X)

In [128]:
print('R-Squared value: %.2f' % r2_score(diabetes_y,ypred))

R-Squared value: 0.52
