**# Import the required libraries**

In [18]:
from sklearn import datasets
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


**# Load the diabetes dataset**

In [19]:
X, y = datasets.load_diabetes(as_frame=True, scaled=False, return_X_y=True)


**# Create an empty DataFrame to store the cross-validation results**

In [20]:
results_df = pd.DataFrame(columns=['Model Degree', 'R-Squared', 'MAE'])


**# Perform cross-validation on polynomial models**

In [21]:
for degree in range(9):
    poly_features = PolynomialFeatures(degree=degree)
    X_poly = poly_features.fit_transform(X)
    model = LinearRegression()
    scores = cross_val_score(model, X_poly, y, cv=5, scoring='neg_mean_absolute_error')
    mae_scores = -scores  # Convert negative MAE scores to positive
    r2_scores = cross_val_score(model, X_poly, y, cv=5, scoring='r2')

    # Store the mean and standard deviation of R-Squared and MAE
    mean_r2 = np.mean(r2_scores)
    std_r2 = np.std(r2_scores)
    mean_mae = np.mean(mae_scores)
    std_mae = np.std(mae_scores)

    # Append the results to the DataFrame
    results_df = pd.concat([results_df, pd.DataFrame({'Model Degree': [degree],
                                                      'R-Squared': [f"{mean_r2:.2f} ± {std_r2:.2f}"],
                                                      'MAE': [f"{mean_mae:.2f} ± {std_mae:.2f}"]})],
                           ignore_index=True)


**# Print the results table**

In [22]:
print(results_df)


  Model Degree             R-Squared               MAE
0            0          -0.03 ± 0.04      66.05 ± 3.47
1            1           0.48 ± 0.05      44.28 ± 2.10
2            2          -0.35 ± 0.65     63.55 ± 14.95
3            3      -203.42 ± 225.88   342.05 ± 142.44
4            4      -571.08 ± 369.89   657.26 ± 159.48
5            5      -436.86 ± 379.10    562.99 ± 59.92
6            6    -1695.48 ± 2632.42   742.56 ± 191.04
7            7    -5530.89 ± 9518.59  1032.68 ± 393.44
8            8  -16076.26 ± 28049.95  1475.66 ± 706.28


**# Identify the best model based on R-Squared and MAE metrics**

In [24]:
# Convert 'R-Squared' and 'MAE' columns to numeric values
results_df['R-Squared'] = results_df['R-Squared'].str.extract('(\d+\.\d+)')
results_df['MAE'] = results_df['MAE'].str.extract('(\d+\.\d+)')

# Convert the columns to numeric dtype
results_df['R-Squared'] = pd.to_numeric(results_df['R-Squared'])
results_df['MAE'] = pd.to_numeric(results_df['MAE'])

best_model_r2 = results_df['R-Squared'].idxmax()
best_model_mae = results_df['MAE'].idxmin()
best_model = results_df.loc[[best_model_r2, best_model_mae]]
print("\nBest Model:")
print(best_model)




Best Model:
  Model Degree  R-Squared      MAE
8            8   16076.26  1475.66
1            1       0.48    44.28


**# Explanation for Choosing the Specific Model**<br>
-> The model with a polynomial degree of 8 is chosen as the best model based on the highest R-Squared value (**16076.26**) and relatively low Mean Absolute Error (MAE) value (**1475.66**). <br>
-> This model demonstrates the ability to explain a significant amount of variance in the target variable and provides reasonably accurate predictions. <br>-> However, the high degree of complexity in this model may increase the risk of overfitting, so careful consideration should be given to the trade-off between complexity and performance.